X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=net%2Fipv4%2Ftcp.c;h=071e83a894ad9c02081f24b9ba243c77f7ea8d7c;hb=5e226e4d9016daee170699f8a4188a5505021756;hp=56ed40703f98872c43837e3e00452f845d76e478;hpb=9c55e01c0cc835818475a6ce8c4d684df9949ac8;p=linux-2.6 diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 56ed40703f..071e83a894 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -308,7 +308,7 @@ struct tcp_splice_state { /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. - * All the sk_stream_mem_schedule() is of this nature: accounting + * All the __sk_mem_schedule() is of this nature: accounting * is strict, actions are advisory and have some latency. */ int tcp_memory_pressure __read_mostly; @@ -485,7 +485,8 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) tcb->sacked = 0; skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); - sk_charge_skb(sk, skb); + sk->sk_wmem_queued += skb->truesize; + sk_mem_charge(sk, skb->truesize); if (tp->nonagle & TCP_NAGLE_PUSH) tp->nonagle &= ~TCP_NAGLE_PUSH; } @@ -496,7 +497,6 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, if (flags & MSG_OOB) { tp->urg_mode = 1; tp->snd_up = tp->write_seq; - TCP_SKB_CB(skb)->sacked |= TCPCB_URG; } } @@ -515,8 +515,8 @@ static inline void tcp_push(struct sock *sk, int flags, int mss_now, } } -int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, - unsigned int offset, size_t len) +static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, + unsigned int offset, size_t len) { struct tcp_splice_state *tss = rd_desc->arg.data; @@ -629,6 +629,31 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, return ret; } +struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) +{ + struct sk_buff *skb; + + /* The TCP header must be at least 32-bit aligned. */ + size = ALIGN(size, 4); + + skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); + if (skb) { + if (sk_wmem_schedule(sk, skb->truesize)) { + /* + * Make sure that we have exactly size bytes + * available to the caller, no more, no less. + */ + skb_reserve(skb, skb_tailroom(skb) - size); + return skb; + } + __kfree_skb(skb); + } else { + sk->sk_prot->enter_memory_pressure(); + sk_stream_moderate_sndbuf(sk); + } + return NULL; +} + static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags) { @@ -665,8 +690,7 @@ new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; - skb = sk_stream_alloc_pskb(sk, 0, 0, - sk->sk_allocation); + skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); if (!skb) goto wait_for_memory; @@ -683,7 +707,7 @@ new_segment: tcp_mark_push(tp, skb); goto new_segment; } - if (!sk_stream_wmem_schedule(sk, copy)) + if (!sk_wmem_schedule(sk, copy)) goto wait_for_memory; if (can_coalesce) { @@ -697,7 +721,7 @@ new_segment: skb->data_len += copy; skb->truesize += copy; sk->sk_wmem_queued += copy; - sk->sk_forward_alloc -= copy; + sk_mem_charge(sk, copy); skb->ip_summed = CHECKSUM_PARTIAL; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; @@ -846,8 +870,8 @@ new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; - skb = sk_stream_alloc_pskb(sk, select_size(sk), - 0, sk->sk_allocation); + skb = sk_stream_alloc_skb(sk, select_size(sk), + sk->sk_allocation); if (!skb) goto wait_for_memory; @@ -904,7 +928,7 @@ new_segment: if (copy > PAGE_SIZE - off) copy = PAGE_SIZE - off; - if (!sk_stream_wmem_schedule(sk, copy)) + if (!sk_wmem_schedule(sk, copy)) goto wait_for_memory; if (!page) { @@ -995,7 +1019,7 @@ do_fault: * reset, where we can be unlinking the send_head. */ tcp_check_send_head(sk, skb); - sk_stream_free_skb(sk, skb); + sk_wmem_free_skb(sk, skb); } do_error: @@ -1628,6 +1652,41 @@ recv_urg: goto out; } +void tcp_set_state(struct sock *sk, int state) +{ + int oldstate = sk->sk_state; + + switch (state) { + case TCP_ESTABLISHED: + if (oldstate != TCP_ESTABLISHED) + TCP_INC_STATS(TCP_MIB_CURRESTAB); + break; + + case TCP_CLOSE: + if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED) + TCP_INC_STATS(TCP_MIB_ESTABRESETS); + + sk->sk_prot->unhash(sk); + if (inet_csk(sk)->icsk_bind_hash && + !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) + inet_put_port(sk); + /* fall through */ + default: + if (oldstate==TCP_ESTABLISHED) + TCP_DEC_STATS(TCP_MIB_CURRESTAB); + } + + /* Change state AFTER socket is unhashed to avoid closed + * socket sitting in hash tables. + */ + sk->sk_state = state; + +#ifdef STATE_TRACE + SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n",sk, statename[oldstate],statename[state]); +#endif +} +EXPORT_SYMBOL_GPL(tcp_set_state); + /* * State processing on a close. This implements the state shift for * sending our FIN frame. Note that we only send a FIN for some @@ -1714,7 +1773,7 @@ void tcp_close(struct sock *sk, long timeout) __kfree_skb(skb); } - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); /* As outlined in RFC 2525, section 2.17, we send a RST here because * data was lost. To witness the awful effects of the old behavior of @@ -1817,7 +1876,7 @@ adjudge_to_death: } } if (sk->sk_state != TCP_CLOSE) { - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); if (tcp_too_many_orphans(sk, atomic_read(sk->sk_prot->orphan_count))) { if (net_ratelimit()) @@ -2539,7 +2598,6 @@ void tcp_done(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_done); -extern void __skb_cb_too_small_for_tcp(int, int); extern struct tcp_congestion_ops tcp_reno; static __initdata unsigned long thash_entries; @@ -2558,9 +2616,7 @@ void __init tcp_init(void) unsigned long limit; int order, i, max_share; - if (sizeof(struct tcp_skb_cb) > sizeof(skb->cb)) - __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb), - sizeof(skb->cb)); + BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", @@ -2637,11 +2693,11 @@ void __init tcp_init(void) limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); max_share = min(4UL*1024*1024, limit); - sysctl_tcp_wmem[0] = SK_STREAM_MEM_QUANTUM; + sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; sysctl_tcp_wmem[1] = 16*1024; sysctl_tcp_wmem[2] = max(64*1024, max_share); - sysctl_tcp_rmem[0] = SK_STREAM_MEM_QUANTUM; + sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; sysctl_tcp_rmem[1] = 87380; sysctl_tcp_rmem[2] = max(87380, max_share);