]> err.no Git - linux-2.6/blob - net/ipv4/netfilter/ip_queue.c
[NETFILTER]: ip_queue: deobfuscate entry lookups
[linux-2.6] / net / ipv4 / netfilter / ip_queue.c
1 /*
2  * This is a module which is used for queueing IPv4 packets and
3  * communicating with userspace via netlink.
4  *
5  * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
6  * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12 #include <linux/module.h>
13 #include <linux/skbuff.h>
14 #include <linux/init.h>
15 #include <linux/ip.h>
16 #include <linux/notifier.h>
17 #include <linux/netdevice.h>
18 #include <linux/netfilter.h>
19 #include <linux/netfilter_ipv4/ip_queue.h>
20 #include <linux/netfilter_ipv4/ip_tables.h>
21 #include <linux/netlink.h>
22 #include <linux/spinlock.h>
23 #include <linux/sysctl.h>
24 #include <linux/proc_fs.h>
25 #include <linux/seq_file.h>
26 #include <linux/security.h>
27 #include <linux/mutex.h>
28 #include <net/net_namespace.h>
29 #include <net/sock.h>
30 #include <net/route.h>
31 #include <net/netfilter/nf_queue.h>
32
33 #define IPQ_QMAX_DEFAULT 1024
34 #define IPQ_PROC_FS_NAME "ip_queue"
35 #define NET_IPQ_QMAX 2088
36 #define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
37
38 struct ipq_queue_entry {
39         struct list_head list;
40         struct nf_info *info;
41         struct sk_buff *skb;
42 };
43
44 typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
45
46 static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
47 static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
48 static DEFINE_RWLOCK(queue_lock);
49 static int peer_pid __read_mostly;
50 static unsigned int copy_range __read_mostly;
51 static unsigned int queue_total;
52 static unsigned int queue_dropped = 0;
53 static unsigned int queue_user_dropped = 0;
54 static struct sock *ipqnl __read_mostly;
55 static LIST_HEAD(queue_list);
56 static DEFINE_MUTEX(ipqnl_mutex);
57
58 static void
59 ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
60 {
61         /* TCP input path (and probably other bits) assume to be called
62          * from softirq context, not from syscall, like ipq_issue_verdict is
63          * called.  TCP input path deadlocks with locks taken from timer
64          * softirq, e.g.  We therefore emulate this by local_bh_disable() */
65
66         local_bh_disable();
67         nf_reinject(entry->skb, entry->info, verdict);
68         local_bh_enable();
69
70         kfree(entry);
71 }
72
73 static inline void
74 __ipq_enqueue_entry(struct ipq_queue_entry *entry)
75 {
76        list_add_tail(&entry->list, &queue_list);
77        queue_total++;
78 }
79
80 static inline int
81 __ipq_set_mode(unsigned char mode, unsigned int range)
82 {
83         int status = 0;
84
85         switch(mode) {
86         case IPQ_COPY_NONE:
87         case IPQ_COPY_META:
88                 copy_mode = mode;
89                 copy_range = 0;
90                 break;
91
92         case IPQ_COPY_PACKET:
93                 copy_mode = mode;
94                 copy_range = range;
95                 if (copy_range > 0xFFFF)
96                         copy_range = 0xFFFF;
97                 break;
98
99         default:
100                 status = -EINVAL;
101
102         }
103         return status;
104 }
105
106 static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
107
108 static inline void
109 __ipq_reset(void)
110 {
111         peer_pid = 0;
112         net_disable_timestamp();
113         __ipq_set_mode(IPQ_COPY_NONE, 0);
114         __ipq_flush(NULL, 0);
115 }
116
117 static struct ipq_queue_entry *
118 ipq_find_dequeue_entry(unsigned long id)
119 {
120         struct ipq_queue_entry *entry = NULL, *i;
121
122         write_lock_bh(&queue_lock);
123
124         list_for_each_entry(i, &queue_list, list) {
125                 if ((unsigned long)i == id) {
126                         entry = i;
127                         break;
128                 }
129         }
130
131         if (entry) {
132                 list_del(&entry->list);
133                 queue_total--;
134         }
135
136         write_unlock_bh(&queue_lock);
137         return entry;
138 }
139
140 static void
141 __ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
142 {
143         struct ipq_queue_entry *entry, *next;
144
145         list_for_each_entry_safe(entry, next, &queue_list, list) {
146                 if (!cmpfn || cmpfn(entry, data)) {
147                         list_del(&entry->list);
148                         queue_total--;
149                         ipq_issue_verdict(entry, NF_DROP);
150                 }
151         }
152 }
153
154 static void
155 ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
156 {
157         write_lock_bh(&queue_lock);
158         __ipq_flush(cmpfn, data);
159         write_unlock_bh(&queue_lock);
160 }
161
162 static struct sk_buff *
163 ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
164 {
165         sk_buff_data_t old_tail;
166         size_t size = 0;
167         size_t data_len = 0;
168         struct sk_buff *skb;
169         struct ipq_packet_msg *pmsg;
170         struct nlmsghdr *nlh;
171         struct timeval tv;
172
173         read_lock_bh(&queue_lock);
174
175         switch (copy_mode) {
176         case IPQ_COPY_META:
177         case IPQ_COPY_NONE:
178                 size = NLMSG_SPACE(sizeof(*pmsg));
179                 data_len = 0;
180                 break;
181
182         case IPQ_COPY_PACKET:
183                 if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
184                      entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
185                     (*errp = skb_checksum_help(entry->skb))) {
186                         read_unlock_bh(&queue_lock);
187                         return NULL;
188                 }
189                 if (copy_range == 0 || copy_range > entry->skb->len)
190                         data_len = entry->skb->len;
191                 else
192                         data_len = copy_range;
193
194                 size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
195                 break;
196
197         default:
198                 *errp = -EINVAL;
199                 read_unlock_bh(&queue_lock);
200                 return NULL;
201         }
202
203         read_unlock_bh(&queue_lock);
204
205         skb = alloc_skb(size, GFP_ATOMIC);
206         if (!skb)
207                 goto nlmsg_failure;
208
209         old_tail = skb->tail;
210         nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
211         pmsg = NLMSG_DATA(nlh);
212         memset(pmsg, 0, sizeof(*pmsg));
213
214         pmsg->packet_id       = (unsigned long )entry;
215         pmsg->data_len        = data_len;
216         tv = ktime_to_timeval(entry->skb->tstamp);
217         pmsg->timestamp_sec   = tv.tv_sec;
218         pmsg->timestamp_usec  = tv.tv_usec;
219         pmsg->mark            = entry->skb->mark;
220         pmsg->hook            = entry->info->hook;
221         pmsg->hw_protocol     = entry->skb->protocol;
222
223         if (entry->info->indev)
224                 strcpy(pmsg->indev_name, entry->info->indev->name);
225         else
226                 pmsg->indev_name[0] = '\0';
227
228         if (entry->info->outdev)
229                 strcpy(pmsg->outdev_name, entry->info->outdev->name);
230         else
231                 pmsg->outdev_name[0] = '\0';
232
233         if (entry->info->indev && entry->skb->dev) {
234                 pmsg->hw_type = entry->skb->dev->type;
235                 pmsg->hw_addrlen = dev_parse_header(entry->skb,
236                                                     pmsg->hw_addr);
237         }
238
239         if (data_len)
240                 if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
241                         BUG();
242
243         nlh->nlmsg_len = skb->tail - old_tail;
244         return skb;
245
246 nlmsg_failure:
247         if (skb)
248                 kfree_skb(skb);
249         *errp = -EINVAL;
250         printk(KERN_ERR "ip_queue: error creating packet message\n");
251         return NULL;
252 }
253
254 static int
255 ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
256                    unsigned int queuenum)
257 {
258         int status = -EINVAL;
259         struct sk_buff *nskb;
260         struct ipq_queue_entry *entry;
261
262         if (copy_mode == IPQ_COPY_NONE)
263                 return -EAGAIN;
264
265         entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
266         if (entry == NULL) {
267                 printk(KERN_ERR "ip_queue: OOM in ipq_enqueue_packet()\n");
268                 return -ENOMEM;
269         }
270
271         entry->info = info;
272         entry->skb = skb;
273
274         nskb = ipq_build_packet_message(entry, &status);
275         if (nskb == NULL)
276                 goto err_out_free;
277
278         write_lock_bh(&queue_lock);
279
280         if (!peer_pid)
281                 goto err_out_free_nskb;
282
283         if (queue_total >= queue_maxlen) {
284                 queue_dropped++;
285                 status = -ENOSPC;
286                 if (net_ratelimit())
287                           printk (KERN_WARNING "ip_queue: full at %d entries, "
288                                   "dropping packets(s). Dropped: %d\n", queue_total,
289                                   queue_dropped);
290                 goto err_out_free_nskb;
291         }
292
293         /* netlink_unicast will either free the nskb or attach it to a socket */
294         status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
295         if (status < 0) {
296                 queue_user_dropped++;
297                 goto err_out_unlock;
298         }
299
300         __ipq_enqueue_entry(entry);
301
302         write_unlock_bh(&queue_lock);
303         return status;
304
305 err_out_free_nskb:
306         kfree_skb(nskb);
307
308 err_out_unlock:
309         write_unlock_bh(&queue_lock);
310
311 err_out_free:
312         kfree(entry);
313         return status;
314 }
315
316 static int
317 ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
318 {
319         int diff;
320         int err;
321         struct iphdr *user_iph = (struct iphdr *)v->payload;
322
323         if (v->data_len < sizeof(*user_iph))
324                 return 0;
325         diff = v->data_len - e->skb->len;
326         if (diff < 0) {
327                 if (pskb_trim(e->skb, v->data_len))
328                         return -ENOMEM;
329         } else if (diff > 0) {
330                 if (v->data_len > 0xFFFF)
331                         return -EINVAL;
332                 if (diff > skb_tailroom(e->skb)) {
333                         err = pskb_expand_head(e->skb, 0,
334                                                diff - skb_tailroom(e->skb),
335                                                GFP_ATOMIC);
336                         if (err) {
337                                 printk(KERN_WARNING "ip_queue: error "
338                                       "in mangle, dropping packet: %d\n", -err);
339                                 return err;
340                         }
341                 }
342                 skb_put(e->skb, diff);
343         }
344         if (!skb_make_writable(e->skb, v->data_len))
345                 return -ENOMEM;
346         skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
347         e->skb->ip_summed = CHECKSUM_NONE;
348
349         return 0;
350 }
351
352 static int
353 ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
354 {
355         struct ipq_queue_entry *entry;
356
357         if (vmsg->value > NF_MAX_VERDICT)
358                 return -EINVAL;
359
360         entry = ipq_find_dequeue_entry(vmsg->id);
361         if (entry == NULL)
362                 return -ENOENT;
363         else {
364                 int verdict = vmsg->value;
365
366                 if (vmsg->data_len && vmsg->data_len == len)
367                         if (ipq_mangle_ipv4(vmsg, entry) < 0)
368                                 verdict = NF_DROP;
369
370                 ipq_issue_verdict(entry, verdict);
371                 return 0;
372         }
373 }
374
375 static int
376 ipq_set_mode(unsigned char mode, unsigned int range)
377 {
378         int status;
379
380         write_lock_bh(&queue_lock);
381         status = __ipq_set_mode(mode, range);
382         write_unlock_bh(&queue_lock);
383         return status;
384 }
385
386 static int
387 ipq_receive_peer(struct ipq_peer_msg *pmsg,
388                  unsigned char type, unsigned int len)
389 {
390         int status = 0;
391
392         if (len < sizeof(*pmsg))
393                 return -EINVAL;
394
395         switch (type) {
396         case IPQM_MODE:
397                 status = ipq_set_mode(pmsg->msg.mode.value,
398                                       pmsg->msg.mode.range);
399                 break;
400
401         case IPQM_VERDICT:
402                 if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
403                         status = -EINVAL;
404                 else
405                         status = ipq_set_verdict(&pmsg->msg.verdict,
406                                                  len - sizeof(*pmsg));
407                         break;
408         default:
409                 status = -EINVAL;
410         }
411         return status;
412 }
413
414 static int
415 dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
416 {
417         if (entry->info->indev)
418                 if (entry->info->indev->ifindex == ifindex)
419                         return 1;
420         if (entry->info->outdev)
421                 if (entry->info->outdev->ifindex == ifindex)
422                         return 1;
423 #ifdef CONFIG_BRIDGE_NETFILTER
424         if (entry->skb->nf_bridge) {
425                 if (entry->skb->nf_bridge->physindev &&
426                     entry->skb->nf_bridge->physindev->ifindex == ifindex)
427                         return 1;
428                 if (entry->skb->nf_bridge->physoutdev &&
429                     entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
430                         return 1;
431         }
432 #endif
433         return 0;
434 }
435
436 static void
437 ipq_dev_drop(int ifindex)
438 {
439         ipq_flush(dev_cmp, ifindex);
440 }
441
442 #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
443
444 static inline void
445 __ipq_rcv_skb(struct sk_buff *skb)
446 {
447         int status, type, pid, flags, nlmsglen, skblen;
448         struct nlmsghdr *nlh;
449
450         skblen = skb->len;
451         if (skblen < sizeof(*nlh))
452                 return;
453
454         nlh = nlmsg_hdr(skb);
455         nlmsglen = nlh->nlmsg_len;
456         if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
457                 return;
458
459         pid = nlh->nlmsg_pid;
460         flags = nlh->nlmsg_flags;
461
462         if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
463                 RCV_SKB_FAIL(-EINVAL);
464
465         if (flags & MSG_TRUNC)
466                 RCV_SKB_FAIL(-ECOMM);
467
468         type = nlh->nlmsg_type;
469         if (type < NLMSG_NOOP || type >= IPQM_MAX)
470                 RCV_SKB_FAIL(-EINVAL);
471
472         if (type <= IPQM_BASE)
473                 return;
474
475         if (security_netlink_recv(skb, CAP_NET_ADMIN))
476                 RCV_SKB_FAIL(-EPERM);
477
478         write_lock_bh(&queue_lock);
479
480         if (peer_pid) {
481                 if (peer_pid != pid) {
482                         write_unlock_bh(&queue_lock);
483                         RCV_SKB_FAIL(-EBUSY);
484                 }
485         } else {
486                 net_enable_timestamp();
487                 peer_pid = pid;
488         }
489
490         write_unlock_bh(&queue_lock);
491
492         status = ipq_receive_peer(NLMSG_DATA(nlh), type,
493                                   nlmsglen - NLMSG_LENGTH(0));
494         if (status < 0)
495                 RCV_SKB_FAIL(status);
496
497         if (flags & NLM_F_ACK)
498                 netlink_ack(skb, nlh, 0);
499         return;
500 }
501
502 static void
503 ipq_rcv_skb(struct sk_buff *skb)
504 {
505         mutex_lock(&ipqnl_mutex);
506         __ipq_rcv_skb(skb);
507         mutex_unlock(&ipqnl_mutex);
508 }
509
510 static int
511 ipq_rcv_dev_event(struct notifier_block *this,
512                   unsigned long event, void *ptr)
513 {
514         struct net_device *dev = ptr;
515
516         if (dev->nd_net != &init_net)
517                 return NOTIFY_DONE;
518
519         /* Drop any packets associated with the downed device */
520         if (event == NETDEV_DOWN)
521                 ipq_dev_drop(dev->ifindex);
522         return NOTIFY_DONE;
523 }
524
525 static struct notifier_block ipq_dev_notifier = {
526         .notifier_call  = ipq_rcv_dev_event,
527 };
528
529 static int
530 ipq_rcv_nl_event(struct notifier_block *this,
531                  unsigned long event, void *ptr)
532 {
533         struct netlink_notify *n = ptr;
534
535         if (event == NETLINK_URELEASE &&
536             n->protocol == NETLINK_FIREWALL && n->pid) {
537                 write_lock_bh(&queue_lock);
538                 if ((n->net == &init_net) && (n->pid == peer_pid))
539                         __ipq_reset();
540                 write_unlock_bh(&queue_lock);
541         }
542         return NOTIFY_DONE;
543 }
544
545 static struct notifier_block ipq_nl_notifier = {
546         .notifier_call  = ipq_rcv_nl_event,
547 };
548
549 static struct ctl_table_header *ipq_sysctl_header;
550
551 static ctl_table ipq_table[] = {
552         {
553                 .ctl_name       = NET_IPQ_QMAX,
554                 .procname       = NET_IPQ_QMAX_NAME,
555                 .data           = &queue_maxlen,
556                 .maxlen         = sizeof(queue_maxlen),
557                 .mode           = 0644,
558                 .proc_handler   = proc_dointvec
559         },
560         { .ctl_name = 0 }
561 };
562
563 static ctl_table ipq_dir_table[] = {
564         {
565                 .ctl_name       = NET_IPV4,
566                 .procname       = "ipv4",
567                 .mode           = 0555,
568                 .child          = ipq_table
569         },
570         { .ctl_name = 0 }
571 };
572
573 static ctl_table ipq_root_table[] = {
574         {
575                 .ctl_name       = CTL_NET,
576                 .procname       = "net",
577                 .mode           = 0555,
578                 .child          = ipq_dir_table
579         },
580         { .ctl_name = 0 }
581 };
582
583 static int ip_queue_show(struct seq_file *m, void *v)
584 {
585         read_lock_bh(&queue_lock);
586
587         seq_printf(m,
588                       "Peer PID          : %d\n"
589                       "Copy mode         : %hu\n"
590                       "Copy range        : %u\n"
591                       "Queue length      : %u\n"
592                       "Queue max. length : %u\n"
593                       "Queue dropped     : %u\n"
594                       "Netlink dropped   : %u\n",
595                       peer_pid,
596                       copy_mode,
597                       copy_range,
598                       queue_total,
599                       queue_maxlen,
600                       queue_dropped,
601                       queue_user_dropped);
602
603         read_unlock_bh(&queue_lock);
604         return 0;
605 }
606
607 static int ip_queue_open(struct inode *inode, struct file *file)
608 {
609         return single_open(file, ip_queue_show, NULL);
610 }
611
612 static const struct file_operations ip_queue_proc_fops = {
613         .open           = ip_queue_open,
614         .read           = seq_read,
615         .llseek         = seq_lseek,
616         .release        = single_release,
617         .owner          = THIS_MODULE,
618 };
619
620 static const struct nf_queue_handler nfqh = {
621         .name   = "ip_queue",
622         .outfn  = &ipq_enqueue_packet,
623 };
624
625 static int __init ip_queue_init(void)
626 {
627         int status = -ENOMEM;
628         struct proc_dir_entry *proc;
629
630         netlink_register_notifier(&ipq_nl_notifier);
631         ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
632                                       ipq_rcv_skb, NULL, THIS_MODULE);
633         if (ipqnl == NULL) {
634                 printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
635                 goto cleanup_netlink_notifier;
636         }
637
638         proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net);
639         if (proc) {
640                 proc->owner = THIS_MODULE;
641                 proc->proc_fops = &ip_queue_proc_fops;
642         } else {
643                 printk(KERN_ERR "ip_queue: failed to create proc entry\n");
644                 goto cleanup_ipqnl;
645         }
646
647         register_netdevice_notifier(&ipq_dev_notifier);
648         ipq_sysctl_header = register_sysctl_table(ipq_root_table);
649
650         status = nf_register_queue_handler(PF_INET, &nfqh);
651         if (status < 0) {
652                 printk(KERN_ERR "ip_queue: failed to register queue handler\n");
653                 goto cleanup_sysctl;
654         }
655         return status;
656
657 cleanup_sysctl:
658         unregister_sysctl_table(ipq_sysctl_header);
659         unregister_netdevice_notifier(&ipq_dev_notifier);
660         proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
661 cleanup_ipqnl:
662         sock_release(ipqnl->sk_socket);
663         mutex_lock(&ipqnl_mutex);
664         mutex_unlock(&ipqnl_mutex);
665
666 cleanup_netlink_notifier:
667         netlink_unregister_notifier(&ipq_nl_notifier);
668         return status;
669 }
670
671 static void __exit ip_queue_fini(void)
672 {
673         nf_unregister_queue_handlers(&nfqh);
674         synchronize_net();
675         ipq_flush(NULL, 0);
676
677         unregister_sysctl_table(ipq_sysctl_header);
678         unregister_netdevice_notifier(&ipq_dev_notifier);
679         proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
680
681         sock_release(ipqnl->sk_socket);
682         mutex_lock(&ipqnl_mutex);
683         mutex_unlock(&ipqnl_mutex);
684
685         netlink_unregister_notifier(&ipq_nl_notifier);
686 }
687
688 MODULE_DESCRIPTION("IPv4 packet queue handler");
689 MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
690 MODULE_LICENSE("GPL");
691
692 module_init(ip_queue_init);
693 module_exit(ip_queue_fini);