6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
9 * YOSHIFUJI Hideaki @USAGI
10 * Split up af-specific functions
11 * Derek Atkins <derek@ihtfp.com>
12 * Add UDP Encapsulation
16 #include <linux/workqueue.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <asm/uaccess.h>
23 #include <linux/audit.h>
25 #include "xfrm_hash.h"
28 EXPORT_SYMBOL(xfrm_nl);
30 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
31 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
33 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
34 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
36 /* Each xfrm_state may be linked to two tables:
38 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
39 2. Hash table by (daddr,family,reqid) to find what SAs exist for given
40 destination/tunnel endpoint. (output)
43 static DEFINE_SPINLOCK(xfrm_state_lock);
45 /* Hash table to find appropriate SA towards given target (endpoint
46 * of tunnel or destination of transport mode) allowed by selector.
48 * Main use is finding SA after policy selected tunnel or transport mode.
49 * Also, it can be used by ah/esp icmp error handler to find offending SA.
51 static struct hlist_head *xfrm_state_bydst __read_mostly;
52 static struct hlist_head *xfrm_state_bysrc __read_mostly;
53 static struct hlist_head *xfrm_state_byspi __read_mostly;
54 static unsigned int xfrm_state_hmask __read_mostly;
55 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
56 static unsigned int xfrm_state_num;
57 static unsigned int xfrm_state_genid;
59 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
60 xfrm_address_t *saddr,
62 unsigned short family)
64 return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
67 static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
68 xfrm_address_t *saddr,
69 unsigned short family)
71 return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
74 static inline unsigned int
75 xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
77 return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
80 static void xfrm_hash_transfer(struct hlist_head *list,
81 struct hlist_head *ndsttable,
82 struct hlist_head *nsrctable,
83 struct hlist_head *nspitable,
84 unsigned int nhashmask)
86 struct hlist_node *entry, *tmp;
89 hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
92 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
93 x->props.reqid, x->props.family,
95 hlist_add_head(&x->bydst, ndsttable+h);
97 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
100 hlist_add_head(&x->bysrc, nsrctable+h);
103 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
104 x->id.proto, x->props.family,
106 hlist_add_head(&x->byspi, nspitable+h);
111 static unsigned long xfrm_hash_new_size(void)
113 return ((xfrm_state_hmask + 1) << 1) *
114 sizeof(struct hlist_head);
117 static DEFINE_MUTEX(hash_resize_mutex);
119 static void xfrm_hash_resize(struct work_struct *__unused)
121 struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
122 unsigned long nsize, osize;
123 unsigned int nhashmask, ohashmask;
126 mutex_lock(&hash_resize_mutex);
128 nsize = xfrm_hash_new_size();
129 ndst = xfrm_hash_alloc(nsize);
132 nsrc = xfrm_hash_alloc(nsize);
134 xfrm_hash_free(ndst, nsize);
137 nspi = xfrm_hash_alloc(nsize);
139 xfrm_hash_free(ndst, nsize);
140 xfrm_hash_free(nsrc, nsize);
144 spin_lock_bh(&xfrm_state_lock);
146 nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
147 for (i = xfrm_state_hmask; i >= 0; i--)
148 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
151 odst = xfrm_state_bydst;
152 osrc = xfrm_state_bysrc;
153 ospi = xfrm_state_byspi;
154 ohashmask = xfrm_state_hmask;
156 xfrm_state_bydst = ndst;
157 xfrm_state_bysrc = nsrc;
158 xfrm_state_byspi = nspi;
159 xfrm_state_hmask = nhashmask;
161 spin_unlock_bh(&xfrm_state_lock);
163 osize = (ohashmask + 1) * sizeof(struct hlist_head);
164 xfrm_hash_free(odst, osize);
165 xfrm_hash_free(osrc, osize);
166 xfrm_hash_free(ospi, osize);
169 mutex_unlock(&hash_resize_mutex);
172 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
174 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
175 EXPORT_SYMBOL(km_waitq);
177 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
178 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
180 static struct work_struct xfrm_state_gc_work;
181 static HLIST_HEAD(xfrm_state_gc_list);
182 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
184 int __xfrm_state_delete(struct xfrm_state *x);
186 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
187 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
189 static void xfrm_state_gc_destroy(struct xfrm_state *x)
191 del_timer_sync(&x->timer);
192 del_timer_sync(&x->rtimer);
199 xfrm_put_mode(x->mode);
201 x->type->destructor(x);
202 xfrm_put_type(x->type);
204 security_xfrm_state_free(x);
208 static void xfrm_state_gc_task(struct work_struct *data)
210 struct xfrm_state *x;
211 struct hlist_node *entry, *tmp;
212 struct hlist_head gc_list;
214 spin_lock_bh(&xfrm_state_gc_lock);
215 gc_list.first = xfrm_state_gc_list.first;
216 INIT_HLIST_HEAD(&xfrm_state_gc_list);
217 spin_unlock_bh(&xfrm_state_gc_lock);
219 hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
220 xfrm_state_gc_destroy(x);
225 static inline unsigned long make_jiffies(long secs)
227 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
228 return MAX_SCHEDULE_TIMEOUT-1;
233 static void xfrm_timer_handler(unsigned long data)
235 struct xfrm_state *x = (struct xfrm_state*)data;
236 unsigned long now = (unsigned long)xtime.tv_sec;
237 long next = LONG_MAX;
242 if (x->km.state == XFRM_STATE_DEAD)
244 if (x->km.state == XFRM_STATE_EXPIRED)
246 if (x->lft.hard_add_expires_seconds) {
247 long tmo = x->lft.hard_add_expires_seconds +
248 x->curlft.add_time - now;
254 if (x->lft.hard_use_expires_seconds) {
255 long tmo = x->lft.hard_use_expires_seconds +
256 (x->curlft.use_time ? : now) - now;
264 if (x->lft.soft_add_expires_seconds) {
265 long tmo = x->lft.soft_add_expires_seconds +
266 x->curlft.add_time - now;
272 if (x->lft.soft_use_expires_seconds) {
273 long tmo = x->lft.soft_use_expires_seconds +
274 (x->curlft.use_time ? : now) - now;
283 km_state_expired(x, 0, 0);
285 if (next != LONG_MAX)
286 mod_timer(&x->timer, jiffies + make_jiffies(next));
291 if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
292 x->km.state = XFRM_STATE_EXPIRED;
298 err = __xfrm_state_delete(x);
299 if (!err && x->id.spi)
300 km_state_expired(x, 1, 0);
302 xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
303 AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
306 spin_unlock(&x->lock);
309 static void xfrm_replay_timer_handler(unsigned long data);
311 struct xfrm_state *xfrm_state_alloc(void)
313 struct xfrm_state *x;
315 x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
318 atomic_set(&x->refcnt, 1);
319 atomic_set(&x->tunnel_users, 0);
320 INIT_HLIST_NODE(&x->bydst);
321 INIT_HLIST_NODE(&x->bysrc);
322 INIT_HLIST_NODE(&x->byspi);
323 init_timer(&x->timer);
324 x->timer.function = xfrm_timer_handler;
325 x->timer.data = (unsigned long)x;
326 init_timer(&x->rtimer);
327 x->rtimer.function = xfrm_replay_timer_handler;
328 x->rtimer.data = (unsigned long)x;
329 x->curlft.add_time = (unsigned long)xtime.tv_sec;
330 x->lft.soft_byte_limit = XFRM_INF;
331 x->lft.soft_packet_limit = XFRM_INF;
332 x->lft.hard_byte_limit = XFRM_INF;
333 x->lft.hard_packet_limit = XFRM_INF;
334 x->replay_maxage = 0;
335 x->replay_maxdiff = 0;
336 spin_lock_init(&x->lock);
340 EXPORT_SYMBOL(xfrm_state_alloc);
342 void __xfrm_state_destroy(struct xfrm_state *x)
344 BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
346 spin_lock_bh(&xfrm_state_gc_lock);
347 hlist_add_head(&x->bydst, &xfrm_state_gc_list);
348 spin_unlock_bh(&xfrm_state_gc_lock);
349 schedule_work(&xfrm_state_gc_work);
351 EXPORT_SYMBOL(__xfrm_state_destroy);
353 int __xfrm_state_delete(struct xfrm_state *x)
357 if (x->km.state != XFRM_STATE_DEAD) {
358 x->km.state = XFRM_STATE_DEAD;
359 spin_lock(&xfrm_state_lock);
360 hlist_del(&x->bydst);
361 hlist_del(&x->bysrc);
363 hlist_del(&x->byspi);
365 spin_unlock(&xfrm_state_lock);
367 /* All xfrm_state objects are created by xfrm_state_alloc.
368 * The xfrm_state_alloc call gives a reference, and that
369 * is what we are dropping here.
377 EXPORT_SYMBOL(__xfrm_state_delete);
379 int xfrm_state_delete(struct xfrm_state *x)
383 spin_lock_bh(&x->lock);
384 err = __xfrm_state_delete(x);
385 spin_unlock_bh(&x->lock);
389 EXPORT_SYMBOL(xfrm_state_delete);
391 void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
396 spin_lock_bh(&xfrm_state_lock);
397 for (i = 0; i <= xfrm_state_hmask; i++) {
398 struct hlist_node *entry;
399 struct xfrm_state *x;
401 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
402 if (!xfrm_state_kern(x) &&
403 xfrm_id_proto_match(x->id.proto, proto)) {
405 spin_unlock_bh(&xfrm_state_lock);
407 err = xfrm_state_delete(x);
408 xfrm_audit_log(audit_info->loginuid,
410 AUDIT_MAC_IPSEC_DELSA,
411 err ? 0 : 1, NULL, x);
414 spin_lock_bh(&xfrm_state_lock);
419 spin_unlock_bh(&xfrm_state_lock);
422 EXPORT_SYMBOL(xfrm_state_flush);
425 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
426 struct xfrm_tmpl *tmpl,
427 xfrm_address_t *daddr, xfrm_address_t *saddr,
428 unsigned short family)
430 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
433 afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
434 xfrm_state_put_afinfo(afinfo);
438 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
440 unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
441 struct xfrm_state *x;
442 struct hlist_node *entry;
444 hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
445 if (x->props.family != family ||
447 x->id.proto != proto)
452 if (x->id.daddr.a4 != daddr->a4)
456 if (!ipv6_addr_equal((struct in6_addr *)daddr,
470 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
472 unsigned int h = xfrm_src_hash(daddr, saddr, family);
473 struct xfrm_state *x;
474 struct hlist_node *entry;
476 hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
477 if (x->props.family != family ||
478 x->id.proto != proto)
483 if (x->id.daddr.a4 != daddr->a4 ||
484 x->props.saddr.a4 != saddr->a4)
488 if (!ipv6_addr_equal((struct in6_addr *)daddr,
491 !ipv6_addr_equal((struct in6_addr *)saddr,
505 static inline struct xfrm_state *
506 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
509 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
510 x->id.proto, family);
512 return __xfrm_state_lookup_byaddr(&x->id.daddr,
514 x->id.proto, family);
517 static void xfrm_hash_grow_check(int have_hash_collision)
519 if (have_hash_collision &&
520 (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
521 xfrm_state_num > xfrm_state_hmask)
522 schedule_work(&xfrm_hash_work);
526 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
527 struct flowi *fl, struct xfrm_tmpl *tmpl,
528 struct xfrm_policy *pol, int *err,
529 unsigned short family)
531 unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
532 struct hlist_node *entry;
533 struct xfrm_state *x, *x0;
534 int acquire_in_progress = 0;
536 struct xfrm_state *best = NULL;
538 spin_lock_bh(&xfrm_state_lock);
539 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
540 if (x->props.family == family &&
541 x->props.reqid == tmpl->reqid &&
542 !(x->props.flags & XFRM_STATE_WILDRECV) &&
543 xfrm_state_addr_check(x, daddr, saddr, family) &&
544 tmpl->mode == x->props.mode &&
545 tmpl->id.proto == x->id.proto &&
546 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
548 1. There is a valid state with matching selector.
550 2. Valid state with inappropriate selector. Skip.
552 Entering area of "sysdeps".
554 3. If state is not valid, selector is temporary,
555 it selects only session which triggered
556 previous resolution. Key manager will do
557 something to install a state with proper
560 if (x->km.state == XFRM_STATE_VALID) {
561 if (!xfrm_selector_match(&x->sel, fl, family) ||
562 !security_xfrm_state_pol_flow_match(x, pol, fl))
565 best->km.dying > x->km.dying ||
566 (best->km.dying == x->km.dying &&
567 best->curlft.add_time < x->curlft.add_time))
569 } else if (x->km.state == XFRM_STATE_ACQ) {
570 acquire_in_progress = 1;
571 } else if (x->km.state == XFRM_STATE_ERROR ||
572 x->km.state == XFRM_STATE_EXPIRED) {
573 if (xfrm_selector_match(&x->sel, fl, family) &&
574 security_xfrm_state_pol_flow_match(x, pol, fl))
581 if (!x && !error && !acquire_in_progress) {
583 (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
584 tmpl->id.proto, family)) != NULL) {
589 x = xfrm_state_alloc();
594 /* Initialize temporary selector matching only
595 * to current session. */
596 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
598 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
600 x->km.state = XFRM_STATE_DEAD;
606 if (km_query(x, tmpl, pol) == 0) {
607 x->km.state = XFRM_STATE_ACQ;
608 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
609 h = xfrm_src_hash(daddr, saddr, family);
610 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
612 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
613 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
615 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
616 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
617 add_timer(&x->timer);
619 xfrm_hash_grow_check(x->bydst.next != NULL);
621 x->km.state = XFRM_STATE_DEAD;
631 *err = acquire_in_progress ? -EAGAIN : error;
632 spin_unlock_bh(&xfrm_state_lock);
636 static void __xfrm_state_insert(struct xfrm_state *x)
640 x->genid = ++xfrm_state_genid;
642 h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
643 x->props.reqid, x->props.family);
644 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
646 h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
647 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
650 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
653 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
656 mod_timer(&x->timer, jiffies + HZ);
657 if (x->replay_maxage)
658 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
664 xfrm_hash_grow_check(x->bydst.next != NULL);
667 /* xfrm_state_lock is held */
668 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
670 unsigned short family = xnew->props.family;
671 u32 reqid = xnew->props.reqid;
672 struct xfrm_state *x;
673 struct hlist_node *entry;
676 h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
677 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
678 if (x->props.family == family &&
679 x->props.reqid == reqid &&
680 !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
681 !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
682 x->genid = xfrm_state_genid;
686 void xfrm_state_insert(struct xfrm_state *x)
688 spin_lock_bh(&xfrm_state_lock);
689 __xfrm_state_bump_genids(x);
690 __xfrm_state_insert(x);
691 spin_unlock_bh(&xfrm_state_lock);
693 EXPORT_SYMBOL(xfrm_state_insert);
695 /* xfrm_state_lock is held */
696 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
698 unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
699 struct hlist_node *entry;
700 struct xfrm_state *x;
702 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
703 if (x->props.reqid != reqid ||
704 x->props.mode != mode ||
705 x->props.family != family ||
706 x->km.state != XFRM_STATE_ACQ ||
712 if (x->id.daddr.a4 != daddr->a4 ||
713 x->props.saddr.a4 != saddr->a4)
717 if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
718 (struct in6_addr *)daddr) ||
719 !ipv6_addr_equal((struct in6_addr *)
721 (struct in6_addr *)saddr))
733 x = xfrm_state_alloc();
737 x->sel.daddr.a4 = daddr->a4;
738 x->sel.saddr.a4 = saddr->a4;
739 x->sel.prefixlen_d = 32;
740 x->sel.prefixlen_s = 32;
741 x->props.saddr.a4 = saddr->a4;
742 x->id.daddr.a4 = daddr->a4;
746 ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
747 (struct in6_addr *)daddr);
748 ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
749 (struct in6_addr *)saddr);
750 x->sel.prefixlen_d = 128;
751 x->sel.prefixlen_s = 128;
752 ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
753 (struct in6_addr *)saddr);
754 ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
755 (struct in6_addr *)daddr);
759 x->km.state = XFRM_STATE_ACQ;
761 x->props.family = family;
762 x->props.mode = mode;
763 x->props.reqid = reqid;
764 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
766 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
767 add_timer(&x->timer);
768 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
769 h = xfrm_src_hash(daddr, saddr, family);
770 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
775 xfrm_hash_grow_check(x->bydst.next != NULL);
781 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
783 int xfrm_state_add(struct xfrm_state *x)
785 struct xfrm_state *x1;
788 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
790 family = x->props.family;
792 spin_lock_bh(&xfrm_state_lock);
794 x1 = __xfrm_state_locate(x, use_spi, family);
802 if (use_spi && x->km.seq) {
803 x1 = __xfrm_find_acq_byseq(x->km.seq);
804 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
811 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
813 &x->id.daddr, &x->props.saddr, 0);
815 __xfrm_state_bump_genids(x);
816 __xfrm_state_insert(x);
820 spin_unlock_bh(&xfrm_state_lock);
823 xfrm_state_delete(x1);
829 EXPORT_SYMBOL(xfrm_state_add);
831 int xfrm_state_update(struct xfrm_state *x)
833 struct xfrm_state *x1;
835 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
837 spin_lock_bh(&xfrm_state_lock);
838 x1 = __xfrm_state_locate(x, use_spi, x->props.family);
844 if (xfrm_state_kern(x1)) {
850 if (x1->km.state == XFRM_STATE_ACQ) {
851 __xfrm_state_insert(x);
857 spin_unlock_bh(&xfrm_state_lock);
863 xfrm_state_delete(x1);
869 spin_lock_bh(&x1->lock);
870 if (likely(x1->km.state == XFRM_STATE_VALID)) {
871 if (x->encap && x1->encap)
872 memcpy(x1->encap, x->encap, sizeof(*x1->encap));
873 if (x->coaddr && x1->coaddr) {
874 memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
876 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
877 memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
878 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
881 mod_timer(&x1->timer, jiffies + HZ);
882 if (x1->curlft.use_time)
883 xfrm_state_check_expire(x1);
887 spin_unlock_bh(&x1->lock);
893 EXPORT_SYMBOL(xfrm_state_update);
895 int xfrm_state_check_expire(struct xfrm_state *x)
897 if (!x->curlft.use_time)
898 x->curlft.use_time = (unsigned long)xtime.tv_sec;
900 if (x->km.state != XFRM_STATE_VALID)
903 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
904 x->curlft.packets >= x->lft.hard_packet_limit) {
905 x->km.state = XFRM_STATE_EXPIRED;
906 mod_timer(&x->timer, jiffies);
911 (x->curlft.bytes >= x->lft.soft_byte_limit ||
912 x->curlft.packets >= x->lft.soft_packet_limit)) {
914 km_state_expired(x, 0, 0);
918 EXPORT_SYMBOL(xfrm_state_check_expire);
920 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
922 int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
926 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
928 /* Check tail too... */
932 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
934 int err = xfrm_state_check_expire(x);
937 err = xfrm_state_check_space(x, skb);
941 EXPORT_SYMBOL(xfrm_state_check);
944 xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
945 unsigned short family)
947 struct xfrm_state *x;
949 spin_lock_bh(&xfrm_state_lock);
950 x = __xfrm_state_lookup(daddr, spi, proto, family);
951 spin_unlock_bh(&xfrm_state_lock);
954 EXPORT_SYMBOL(xfrm_state_lookup);
957 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
958 u8 proto, unsigned short family)
960 struct xfrm_state *x;
962 spin_lock_bh(&xfrm_state_lock);
963 x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
964 spin_unlock_bh(&xfrm_state_lock);
967 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
970 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
971 xfrm_address_t *daddr, xfrm_address_t *saddr,
972 int create, unsigned short family)
974 struct xfrm_state *x;
976 spin_lock_bh(&xfrm_state_lock);
977 x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
978 spin_unlock_bh(&xfrm_state_lock);
982 EXPORT_SYMBOL(xfrm_find_acq);
984 #ifdef CONFIG_XFRM_SUB_POLICY
986 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
987 unsigned short family)
990 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
992 return -EAFNOSUPPORT;
994 spin_lock_bh(&xfrm_state_lock);
995 if (afinfo->tmpl_sort)
996 err = afinfo->tmpl_sort(dst, src, n);
997 spin_unlock_bh(&xfrm_state_lock);
998 xfrm_state_put_afinfo(afinfo);
1001 EXPORT_SYMBOL(xfrm_tmpl_sort);
1004 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1005 unsigned short family)
1008 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1010 return -EAFNOSUPPORT;
1012 spin_lock_bh(&xfrm_state_lock);
1013 if (afinfo->state_sort)
1014 err = afinfo->state_sort(dst, src, n);
1015 spin_unlock_bh(&xfrm_state_lock);
1016 xfrm_state_put_afinfo(afinfo);
1019 EXPORT_SYMBOL(xfrm_state_sort);
1022 /* Silly enough, but I'm lazy to build resolution list */
1024 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1028 for (i = 0; i <= xfrm_state_hmask; i++) {
1029 struct hlist_node *entry;
1030 struct xfrm_state *x;
1032 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1033 if (x->km.seq == seq &&
1034 x->km.state == XFRM_STATE_ACQ) {
1043 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1045 struct xfrm_state *x;
1047 spin_lock_bh(&xfrm_state_lock);
1048 x = __xfrm_find_acq_byseq(seq);
1049 spin_unlock_bh(&xfrm_state_lock);
1052 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1054 u32 xfrm_get_acqseq(void)
1058 static DEFINE_SPINLOCK(acqseq_lock);
1060 spin_lock_bh(&acqseq_lock);
1061 res = (++acqseq ? : ++acqseq);
1062 spin_unlock_bh(&acqseq_lock);
1065 EXPORT_SYMBOL(xfrm_get_acqseq);
1068 xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
1071 struct xfrm_state *x0;
1076 if (minspi == maxspi) {
1077 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1085 u32 low = ntohl(minspi);
1086 u32 high = ntohl(maxspi);
1087 for (h=0; h<high-low+1; h++) {
1088 spi = low + net_random()%(high-low+1);
1089 x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1091 x->id.spi = htonl(spi);
1098 spin_lock_bh(&xfrm_state_lock);
1099 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1100 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1101 spin_unlock_bh(&xfrm_state_lock);
1105 EXPORT_SYMBOL(xfrm_alloc_spi);
1107 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1111 struct xfrm_state *x, *last = NULL;
1112 struct hlist_node *entry;
1116 spin_lock_bh(&xfrm_state_lock);
1117 for (i = 0; i <= xfrm_state_hmask; i++) {
1118 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1119 if (!xfrm_id_proto_match(x->id.proto, proto))
1122 err = func(last, count, data);
1134 err = func(last, 0, data);
1136 spin_unlock_bh(&xfrm_state_lock);
1139 EXPORT_SYMBOL(xfrm_state_walk);
1142 void xfrm_replay_notify(struct xfrm_state *x, int event)
1145 /* we send notify messages in case
1146 * 1. we updated on of the sequence numbers, and the seqno difference
1147 * is at least x->replay_maxdiff, in this case we also update the
1148 * timeout of our timer function
1149 * 2. if x->replay_maxage has elapsed since last update,
1150 * and there were changes
1152 * The state structure must be locked!
1156 case XFRM_REPLAY_UPDATE:
1157 if (x->replay_maxdiff &&
1158 (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1159 (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1160 if (x->xflags & XFRM_TIME_DEFER)
1161 event = XFRM_REPLAY_TIMEOUT;
1168 case XFRM_REPLAY_TIMEOUT:
1169 if ((x->replay.seq == x->preplay.seq) &&
1170 (x->replay.bitmap == x->preplay.bitmap) &&
1171 (x->replay.oseq == x->preplay.oseq)) {
1172 x->xflags |= XFRM_TIME_DEFER;
1179 memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1180 c.event = XFRM_MSG_NEWAE;
1181 c.data.aevent = event;
1182 km_state_notify(x, &c);
1184 if (x->replay_maxage &&
1185 !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1186 x->xflags &= ~XFRM_TIME_DEFER;
1188 EXPORT_SYMBOL(xfrm_replay_notify);
1190 static void xfrm_replay_timer_handler(unsigned long data)
1192 struct xfrm_state *x = (struct xfrm_state*)data;
1194 spin_lock(&x->lock);
1196 if (x->km.state == XFRM_STATE_VALID) {
1197 if (xfrm_aevent_is_on())
1198 xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1200 x->xflags |= XFRM_TIME_DEFER;
1203 spin_unlock(&x->lock);
1206 int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1209 u32 seq = ntohl(net_seq);
1211 if (unlikely(seq == 0))
1214 if (likely(seq > x->replay.seq))
1217 diff = x->replay.seq - seq;
1218 if (diff >= x->props.replay_window) {
1219 x->stats.replay_window++;
1223 if (x->replay.bitmap & (1U << diff)) {
1229 EXPORT_SYMBOL(xfrm_replay_check);
1231 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1234 u32 seq = ntohl(net_seq);
1236 if (seq > x->replay.seq) {
1237 diff = seq - x->replay.seq;
1238 if (diff < x->props.replay_window)
1239 x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1241 x->replay.bitmap = 1;
1242 x->replay.seq = seq;
1244 diff = x->replay.seq - seq;
1245 x->replay.bitmap |= (1U << diff);
1248 if (xfrm_aevent_is_on())
1249 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1251 EXPORT_SYMBOL(xfrm_replay_advance);
1253 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1254 static DEFINE_RWLOCK(xfrm_km_lock);
1256 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1258 struct xfrm_mgr *km;
1260 read_lock(&xfrm_km_lock);
1261 list_for_each_entry(km, &xfrm_km_list, list)
1262 if (km->notify_policy)
1263 km->notify_policy(xp, dir, c);
1264 read_unlock(&xfrm_km_lock);
1267 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1269 struct xfrm_mgr *km;
1270 read_lock(&xfrm_km_lock);
1271 list_for_each_entry(km, &xfrm_km_list, list)
1274 read_unlock(&xfrm_km_lock);
1277 EXPORT_SYMBOL(km_policy_notify);
1278 EXPORT_SYMBOL(km_state_notify);
1280 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1286 c.event = XFRM_MSG_EXPIRE;
1287 km_state_notify(x, &c);
1293 EXPORT_SYMBOL(km_state_expired);
1295 * We send to all registered managers regardless of failure
1296 * We are happy with one success
1298 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1300 int err = -EINVAL, acqret;
1301 struct xfrm_mgr *km;
1303 read_lock(&xfrm_km_lock);
1304 list_for_each_entry(km, &xfrm_km_list, list) {
1305 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1309 read_unlock(&xfrm_km_lock);
1312 EXPORT_SYMBOL(km_query);
1314 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1317 struct xfrm_mgr *km;
1319 read_lock(&xfrm_km_lock);
1320 list_for_each_entry(km, &xfrm_km_list, list) {
1321 if (km->new_mapping)
1322 err = km->new_mapping(x, ipaddr, sport);
1326 read_unlock(&xfrm_km_lock);
1329 EXPORT_SYMBOL(km_new_mapping);
1331 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1337 c.event = XFRM_MSG_POLEXPIRE;
1338 km_policy_notify(pol, dir, &c);
1343 EXPORT_SYMBOL(km_policy_expired);
1345 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1349 struct xfrm_mgr *km;
1351 read_lock(&xfrm_km_lock);
1352 list_for_each_entry(km, &xfrm_km_list, list) {
1354 ret = km->report(proto, sel, addr);
1359 read_unlock(&xfrm_km_lock);
1362 EXPORT_SYMBOL(km_report);
1364 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1368 struct xfrm_mgr *km;
1369 struct xfrm_policy *pol = NULL;
1371 if (optlen <= 0 || optlen > PAGE_SIZE)
1374 data = kmalloc(optlen, GFP_KERNEL);
1379 if (copy_from_user(data, optval, optlen))
1383 read_lock(&xfrm_km_lock);
1384 list_for_each_entry(km, &xfrm_km_list, list) {
1385 pol = km->compile_policy(sk, optname, data,
1390 read_unlock(&xfrm_km_lock);
1393 xfrm_sk_policy_insert(sk, err, pol);
1402 EXPORT_SYMBOL(xfrm_user_policy);
1404 int xfrm_register_km(struct xfrm_mgr *km)
1406 write_lock_bh(&xfrm_km_lock);
1407 list_add_tail(&km->list, &xfrm_km_list);
1408 write_unlock_bh(&xfrm_km_lock);
1411 EXPORT_SYMBOL(xfrm_register_km);
1413 int xfrm_unregister_km(struct xfrm_mgr *km)
1415 write_lock_bh(&xfrm_km_lock);
1416 list_del(&km->list);
1417 write_unlock_bh(&xfrm_km_lock);
1420 EXPORT_SYMBOL(xfrm_unregister_km);
1422 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1425 if (unlikely(afinfo == NULL))
1427 if (unlikely(afinfo->family >= NPROTO))
1428 return -EAFNOSUPPORT;
1429 write_lock_bh(&xfrm_state_afinfo_lock);
1430 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1433 xfrm_state_afinfo[afinfo->family] = afinfo;
1434 write_unlock_bh(&xfrm_state_afinfo_lock);
1437 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1439 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1442 if (unlikely(afinfo == NULL))
1444 if (unlikely(afinfo->family >= NPROTO))
1445 return -EAFNOSUPPORT;
1446 write_lock_bh(&xfrm_state_afinfo_lock);
1447 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1448 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1451 xfrm_state_afinfo[afinfo->family] = NULL;
1453 write_unlock_bh(&xfrm_state_afinfo_lock);
1456 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1458 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1460 struct xfrm_state_afinfo *afinfo;
1461 if (unlikely(family >= NPROTO))
1463 read_lock(&xfrm_state_afinfo_lock);
1464 afinfo = xfrm_state_afinfo[family];
1465 if (unlikely(!afinfo))
1466 read_unlock(&xfrm_state_afinfo_lock);
1470 void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1472 read_unlock(&xfrm_state_afinfo_lock);
1475 EXPORT_SYMBOL(xfrm_state_get_afinfo);
1476 EXPORT_SYMBOL(xfrm_state_put_afinfo);
1478 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1479 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1482 struct xfrm_state *t = x->tunnel;
1484 if (atomic_read(&t->tunnel_users) == 2)
1485 xfrm_state_delete(t);
1486 atomic_dec(&t->tunnel_users);
1491 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1494 * This function is NOT optimal. For example, with ESP it will give an
1495 * MTU that's usually two bytes short of being optimal. However, it will
1496 * usually give an answer that's a multiple of 4 provided the input is
1497 * also a multiple of 4.
1499 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1503 res -= x->props.header_len;
1511 spin_lock_bh(&x->lock);
1512 if (x->km.state == XFRM_STATE_VALID &&
1513 x->type && x->type->get_max_size)
1514 m = x->type->get_max_size(x, m);
1516 m += x->props.header_len;
1517 spin_unlock_bh(&x->lock);
1527 int xfrm_init_state(struct xfrm_state *x)
1529 struct xfrm_state_afinfo *afinfo;
1530 int family = x->props.family;
1533 err = -EAFNOSUPPORT;
1534 afinfo = xfrm_state_get_afinfo(family);
1539 if (afinfo->init_flags)
1540 err = afinfo->init_flags(x);
1542 xfrm_state_put_afinfo(afinfo);
1547 err = -EPROTONOSUPPORT;
1548 x->type = xfrm_get_type(x->id.proto, family);
1549 if (x->type == NULL)
1552 err = x->type->init_state(x);
1556 x->mode = xfrm_get_mode(x->props.mode, family);
1557 if (x->mode == NULL)
1560 x->km.state = XFRM_STATE_VALID;
1566 EXPORT_SYMBOL(xfrm_init_state);
1568 void __init xfrm_state_init(void)
1572 sz = sizeof(struct hlist_head) * 8;
1574 xfrm_state_bydst = xfrm_hash_alloc(sz);
1575 xfrm_state_bysrc = xfrm_hash_alloc(sz);
1576 xfrm_state_byspi = xfrm_hash_alloc(sz);
1577 if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1578 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1579 xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1581 INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);