err.no Git - linux-2.6/blob - include/net/tcp.h

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Definitions for the TCP module.
   7  *
   8  * Version:     @(#)tcp.h       1.0.5   05/23/93
   9  *
  10  * Authors:     Ross Biro
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *
  13  *              This program is free software; you can redistribute it and/or
  14  *              modify it under the terms of the GNU General Public License
  15  *              as published by the Free Software Foundation; either version
  16  *              2 of the License, or (at your option) any later version.
  17  */
  18 #ifndef _TCP_H
  19 #define _TCP_H
  20
  21 #define TCP_DEBUG 1
  22 #define INET_CSK_DEBUG 1
  23 #define FASTRETRANS_DEBUG 1
  24
  25 /* Cancel timers, when they are not required. */
  26 #undef INET_CSK_CLEAR_TIMERS
  27
  28 #include <linux/config.h>
  29 #include <linux/list.h>
  30 #include <linux/tcp.h>
  31 #include <linux/slab.h>
  32 #include <linux/cache.h>
  33 #include <linux/percpu.h>
  34 #include <net/inet_hashtables.h>
  35 #include <net/checksum.h>
  36 #include <net/request_sock.h>
  37 #include <net/sock.h>
  38 #include <net/snmp.h>
  39 #include <net/ip.h>
  40 #include <net/tcp_states.h>
  41
  42 #include <linux/seq_file.h>
  43
  44 extern struct inet_hashinfo tcp_hashinfo;
  45
  46 extern atomic_t tcp_orphan_count;
  47 extern int tcp_tw_count;
  48 extern void tcp_time_wait(struct sock *sk, int state, int timeo);
  49 extern void tcp_tw_deschedule(struct inet_timewait_sock *tw);
  50
  51 #define MAX_TCP_HEADER  (128 + MAX_HEADER)
  52
  53 /*
  54  * Never offer a window over 32767 without using window scaling. Some
  55  * poor stacks do signed 16bit maths!
  56  */
  57 #define MAX_TCP_WINDOW          32767U
  58
  59 /* Minimal accepted MSS. It is (60+60+8) - (20+20). */
  60 #define TCP_MIN_MSS             88U
  61
  62 /* Minimal RCV_MSS. */
  63 #define TCP_MIN_RCVMSS          536U
  64
  65 /* After receiving this amount of duplicate ACKs fast retransmit starts. */
  66 #define TCP_FASTRETRANS_THRESH 3
  67
  68 /* Maximal reordering. */
  69 #define TCP_MAX_REORDERING      127
  70
  71 /* Maximal number of ACKs sent quickly to accelerate slow-start. */
  72 #define TCP_MAX_QUICKACKS       16U
  73
  74 /* urg_data states */
  75 #define TCP_URG_VALID   0x0100
  76 #define TCP_URG_NOTYET  0x0200
  77 #define TCP_URG_READ    0x0400
  78
  79 #define TCP_RETR1       3       /*
  80                                  * This is how many retries it does before it
  81                                  * tries to figure out if the gateway is
  82                                  * down. Minimal RFC value is 3; it corresponds
  83                                  * to ~3sec-8min depending on RTO.
  84                                  */
  85
  86 #define TCP_RETR2       15      /*
  87                                  * This should take at least
  88                                  * 90 minutes to time out.
  89                                  * RFC1122 says that the limit is 100 sec.
  90                                  * 15 is ~13-30min depending on RTO.
  91                                  */
  92
  93 #define TCP_SYN_RETRIES  5      /* number of times to retry active opening a
  94                                  * connection: ~180sec is RFC minumum   */
  95
  96 #define TCP_SYNACK_RETRIES 5    /* number of times to retry passive opening a
  97                                  * connection: ~180sec is RFC minumum   */
  98
  99
 100 #define TCP_ORPHAN_RETRIES 7    /* number of times to retry on an orphaned
 101                                  * socket. 7 is ~50sec-16min.
 102                                  */
 103
 104
 105 #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT
 106                                   * state, about 60 seconds     */
 107 #define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN
 108                                  /* BSD style FIN_WAIT2 deadlock breaker.
 109                                   * It used to be 3min, new value is 60sec,
 110                                   * to combine FIN-WAIT-2 timeout with
 111                                   * TIME-WAIT timer.
 112                                   */
 113
 114 #define TCP_DELACK_MAX  ((unsigned)(HZ/5))      /* maximal time to delay before sending an ACK */
 115 #if HZ >= 100
 116 #define TCP_DELACK_MIN  ((unsigned)(HZ/25))     /* minimal time to delay before sending an ACK */
 117 #define TCP_ATO_MIN     ((unsigned)(HZ/25))
 118 #else
 119 #define TCP_DELACK_MIN  4U
 120 #define TCP_ATO_MIN     4U
 121 #endif
 122 #define TCP_RTO_MAX     ((unsigned)(120*HZ))
 123 #define TCP_RTO_MIN     ((unsigned)(HZ/5))
 124 #define TCP_TIMEOUT_INIT ((unsigned)(3*HZ))     /* RFC 1122 initial RTO value   */
 125
 126 #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
 127                                                          * for local resources.
 128                                                          */
 129
 130 #define TCP_KEEPALIVE_TIME      (120*60*HZ)     /* two hours */
 131 #define TCP_KEEPALIVE_PROBES    9               /* Max of 9 keepalive probes    */
 132 #define TCP_KEEPALIVE_INTVL     (75*HZ)
 133
 134 #define MAX_TCP_KEEPIDLE        32767
 135 #define MAX_TCP_KEEPINTVL       32767
 136 #define MAX_TCP_KEEPCNT         127
 137 #define MAX_TCP_SYNCNT          127
 138
 139 #define TCP_SYNQ_INTERVAL       (HZ/5)  /* Period of SYNACK timer */
 140 #define TCP_SYNQ_HSIZE          512     /* Size of SYNACK hash table */
 141
 142 #define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)
 143 #define TCP_PAWS_MSL    60              /* Per-host timestamps are invalidated
 144                                          * after this time. It should be equal
 145                                          * (or greater than) TCP_TIMEWAIT_LEN
 146                                          * to provide reliability equal to one
 147                                          * provided by timewait state.
 148                                          */
 149 #define TCP_PAWS_WINDOW 1               /* Replay window for per-host
 150                                          * timestamps. It must be less than
 151                                          * minimal timewait lifetime.
 152                                          */
 153
 154 #define TCP_TW_RECYCLE_SLOTS_LOG        5
 155 #define TCP_TW_RECYCLE_SLOTS            (1<<TCP_TW_RECYCLE_SLOTS_LOG)
 156
 157 /* If time > 4sec, it is "slow" path, no recycling is required,
 158    so that we select tick to get range about 4 seconds.
 159  */
 160
 161 #if HZ <= 16 || HZ > 4096
 162 # error Unsupported: HZ <= 16 or HZ > 4096
 163 #elif HZ <= 32
 164 # define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG)
 165 #elif HZ <= 64
 166 # define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG)
 167 #elif HZ <= 128
 168 # define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG)
 169 #elif HZ <= 256
 170 # define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG)
 171 #elif HZ <= 512
 172 # define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG)
 173 #elif HZ <= 1024
 174 # define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG)
 175 #elif HZ <= 2048
 176 # define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG)
 177 #else
 178 # define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)
 179 #endif
 180 /*
 181  *      TCP option
 182  */
 183
 184 #define TCPOPT_NOP              1       /* Padding */
 185 #define TCPOPT_EOL              0       /* End of options */
 186 #define TCPOPT_MSS              2       /* Segment size negotiating */
 187 #define TCPOPT_WINDOW           3       /* Window scaling */
 188 #define TCPOPT_SACK_PERM        4       /* SACK Permitted */
 189 #define TCPOPT_SACK             5       /* SACK Block */
 190 #define TCPOPT_TIMESTAMP        8       /* Better RTT estimations/PAWS */
 191
 192 /*
 193  *     TCP option lengths
 194  */
 195
 196 #define TCPOLEN_MSS            4
 197 #define TCPOLEN_WINDOW         3
 198 #define TCPOLEN_SACK_PERM      2
 199 #define TCPOLEN_TIMESTAMP      10
 200
 201 /* But this is what stacks really send out. */
 202 #define TCPOLEN_TSTAMP_ALIGNED          12
 203 #define TCPOLEN_WSCALE_ALIGNED          4
 204 #define TCPOLEN_SACKPERM_ALIGNED        4
 205 #define TCPOLEN_SACK_BASE               2
 206 #define TCPOLEN_SACK_BASE_ALIGNED       4
 207 #define TCPOLEN_SACK_PERBLOCK           8
 208
 209 #define ICSK_TIME_RETRANS       1       /* Retransmit timer */
 210 #define ICSK_TIME_DACK          2       /* Delayed ack timer */
 211 #define ICSK_TIME_PROBE0        3       /* Zero window probe timer */
 212 #define ICSK_TIME_KEEPOPEN      4       /* Keepalive timer */
 213
 214 /* Flags in tp->nonagle */
 215 #define TCP_NAGLE_OFF           1       /* Nagle's algo is disabled */
 216 #define TCP_NAGLE_CORK          2       /* Socket is corked         */
 217 #define TCP_NAGLE_PUSH          4       /* Cork is overriden for already queued data */
 218
 219 /* sysctl variables for tcp */
 220 extern int sysctl_tcp_timestamps;
 221 extern int sysctl_tcp_window_scaling;
 222 extern int sysctl_tcp_sack;
 223 extern int sysctl_tcp_fin_timeout;
 224 extern int sysctl_tcp_tw_recycle;
 225 extern int sysctl_tcp_keepalive_time;
 226 extern int sysctl_tcp_keepalive_probes;
 227 extern int sysctl_tcp_keepalive_intvl;
 228 extern int sysctl_tcp_syn_retries;
 229 extern int sysctl_tcp_synack_retries;
 230 extern int sysctl_tcp_retries1;
 231 extern int sysctl_tcp_retries2;
 232 extern int sysctl_tcp_orphan_retries;
 233 extern int sysctl_tcp_syncookies;
 234 extern int sysctl_tcp_retrans_collapse;
 235 extern int sysctl_tcp_stdurg;
 236 extern int sysctl_tcp_rfc1337;
 237 extern int sysctl_tcp_abort_on_overflow;
 238 extern int sysctl_tcp_max_orphans;
 239 extern int sysctl_tcp_max_tw_buckets;
 240 extern int sysctl_tcp_fack;
 241 extern int sysctl_tcp_reordering;
 242 extern int sysctl_tcp_ecn;
 243 extern int sysctl_tcp_dsack;
 244 extern int sysctl_tcp_mem[3];
 245 extern int sysctl_tcp_wmem[3];
 246 extern int sysctl_tcp_rmem[3];
 247 extern int sysctl_tcp_app_win;
 248 extern int sysctl_tcp_adv_win_scale;
 249 extern int sysctl_tcp_tw_reuse;
 250 extern int sysctl_tcp_frto;
 251 extern int sysctl_tcp_low_latency;
 252 extern int sysctl_tcp_nometrics_save;
 253 extern int sysctl_tcp_moderate_rcvbuf;
 254 extern int sysctl_tcp_tso_win_divisor;
 255
 256 extern atomic_t tcp_memory_allocated;
 257 extern atomic_t tcp_sockets_allocated;
 258 extern int tcp_memory_pressure;
 259
 260 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 261 #define AF_INET_FAMILY(fam) ((fam) == AF_INET)
 262 #else
 263 #define AF_INET_FAMILY(fam) 1
 264 #endif
 265
 266 /*
 267  *      Pointers to address related TCP functions
 268  *      (i.e. things that depend on the address family)
 269  */
 270
 271 struct tcp_func {
 272         int                     (*queue_xmit)           (struct sk_buff *skb,
 273                                                          int ipfragok);
 274
 275         void                    (*send_check)           (struct sock *sk,
 276                                                          struct tcphdr *th,
 277                                                          int len,
 278                                                          struct sk_buff *skb);
 279
 280         int                     (*rebuild_header)       (struct sock *sk);
 281
 282         int                     (*conn_request)         (struct sock *sk,
 283                                                          struct sk_buff *skb);
 284
 285         struct sock *           (*syn_recv_sock)        (struct sock *sk,
 286                                                          struct sk_buff *skb,
 287                                                          struct request_sock *req,
 288                                                          struct dst_entry *dst);
 289
 290         int                     (*remember_stamp)       (struct sock *sk);
 291
 292         __u16                   net_header_len;
 293
 294         int                     (*setsockopt)           (struct sock *sk,
 295                                                          int level,
 296                                                          int optname,
 297                                                          char __user *optval,
 298                                                          int optlen);
 299
 300         int                     (*getsockopt)           (struct sock *sk,
 301                                                          int level,
 302                                                          int optname,
 303                                                          char __user *optval,
 304                                                          int __user *optlen);
 305
 306
 307         void                    (*addr2sockaddr)        (struct sock *sk,
 308                                                          struct sockaddr *);
 309
 310         int sockaddr_len;
 311 };
 312
 313 /*
 314  * The next routines deal with comparing 32 bit unsigned ints
 315  * and worry about wraparound (automatic with unsigned arithmetic).
 316  */
 317
 318 static inline int before(__u32 seq1, __u32 seq2)
 319 {
 320         return (__s32)(seq1-seq2) < 0;
 321 }
 322
 323 static inline int after(__u32 seq1, __u32 seq2)
 324 {
 325         return (__s32)(seq2-seq1) < 0;
 326 }
 327
 328
 329 /* is s2<=s1<=s3 ? */
 330 static inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
 331 {
 332         return seq3 - seq2 >= seq1 - seq2;
 333 }
 334
 335
 336 extern struct proto tcp_prot;
 337
 338 DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
 339 #define TCP_INC_STATS(field)            SNMP_INC_STATS(tcp_statistics, field)
 340 #define TCP_INC_STATS_BH(field)         SNMP_INC_STATS_BH(tcp_statistics, field)
 341 #define TCP_INC_STATS_USER(field)       SNMP_INC_STATS_USER(tcp_statistics, field)
 342 #define TCP_DEC_STATS(field)            SNMP_DEC_STATS(tcp_statistics, field)
 343 #define TCP_ADD_STATS_BH(field, val)    SNMP_ADD_STATS_BH(tcp_statistics, field, val)
 344 #define TCP_ADD_STATS_USER(field, val)  SNMP_ADD_STATS_USER(tcp_statistics, field, val)
 345
 346 extern void                     tcp_v4_err(struct sk_buff *skb, u32);
 347
 348 extern void                     tcp_shutdown (struct sock *sk, int how);
 349
 350 extern int                      tcp_v4_rcv(struct sk_buff *skb);
 351
 352 extern int                      tcp_v4_remember_stamp(struct sock *sk);
 353
 354 extern int                      tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
 355
 356 extern int                      tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
 357                                             struct msghdr *msg, size_t size);
 358 extern ssize_t                  tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
 359
 360 extern int                      tcp_ioctl(struct sock *sk,
 361                                           int cmd,
 362                                           unsigned long arg);
 363
 364 extern int                      tcp_rcv_state_process(struct sock *sk,
 365                                                       struct sk_buff *skb,
 366                                                       struct tcphdr *th,
 367                                                       unsigned len);
 368
 369 extern int                      tcp_rcv_established(struct sock *sk,
 370                                                     struct sk_buff *skb,
 371                                                     struct tcphdr *th,
 372                                                     unsigned len);
 373
 374 extern void                     tcp_rcv_space_adjust(struct sock *sk);
 375
 376 enum inet_csk_ack_state_t {
 377         ICSK_ACK_SCHED  = 1,
 378         ICSK_ACK_TIMER  = 2,
 379         ICSK_ACK_PUSHED = 4
 380 };
 381
 382 static inline void inet_csk_schedule_ack(struct sock *sk)
 383 {
 384         inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED;
 385 }
 386
 387 static inline int inet_csk_ack_scheduled(const struct sock *sk)
 388 {
 389         return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED;
 390 }
 391
 392 static inline void tcp_dec_quickack_mode(struct sock *sk,
 393                                          const unsigned int pkts)
 394 {
 395         struct inet_connection_sock *icsk = inet_csk(sk);
 396
 397         if (icsk->icsk_ack.quick) {
 398                 if (pkts >= icsk->icsk_ack.quick) {
 399                         icsk->icsk_ack.quick = 0;
 400                         /* Leaving quickack mode we deflate ATO. */
 401                         icsk->icsk_ack.ato   = TCP_ATO_MIN;
 402                 } else
 403                         icsk->icsk_ack.quick -= pkts;
 404         }
 405 }
 406
 407 extern void tcp_enter_quickack_mode(struct sock *sk);
 408
 409 static inline void inet_csk_delack_init(struct sock *sk)
 410 {
 411         memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack));
 412 }
 413
 414 static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
 415 {
 416         rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
 417 }
 418
 419 enum tcp_tw_status
 420 {
 421         TCP_TW_SUCCESS = 0,
 422         TCP_TW_RST = 1,
 423         TCP_TW_ACK = 2,
 424         TCP_TW_SYN = 3
 425 };
 426
 427
 428 extern enum tcp_tw_status       tcp_timewait_state_process(struct inet_timewait_sock *tw,
 429                                                            struct sk_buff *skb,
 430                                                            const struct tcphdr *th);
 431
 432 extern struct sock *            tcp_check_req(struct sock *sk,struct sk_buff *skb,
 433                                               struct request_sock *req,
 434                                               struct request_sock **prev);
 435 extern int                      tcp_child_process(struct sock *parent,
 436                                                   struct sock *child,
 437                                                   struct sk_buff *skb);
 438 extern void                     tcp_enter_frto(struct sock *sk);
 439 extern void                     tcp_enter_loss(struct sock *sk, int how);
 440 extern void                     tcp_clear_retrans(struct tcp_sock *tp);
 441 extern void                     tcp_update_metrics(struct sock *sk);
 442
 443 extern void                     tcp_close(struct sock *sk,
 444                                           long timeout);
 445 extern struct sock *            inet_csk_accept(struct sock *sk, int flags, int *err);
 446 extern unsigned int             tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
 447
 448 extern int                      tcp_getsockopt(struct sock *sk, int level,
 449                                                int optname,
 450                                                char __user *optval,
 451                                                int __user *optlen);
 452 extern int                      tcp_setsockopt(struct sock *sk, int level,
 453                                                int optname, char __user *optval,
 454                                                int optlen);
 455 extern void                     tcp_set_keepalive(struct sock *sk, int val);
 456 extern int                      tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
 457                                             struct msghdr *msg,
 458                                             size_t len, int nonblock,
 459                                             int flags, int *addr_len);
 460
 461 extern int                      tcp_listen_start(struct sock *sk);
 462
 463 extern void                     tcp_parse_options(struct sk_buff *skb,
 464                                                   struct tcp_options_received *opt_rx,
 465                                                   int estab);
 466
 467 /*
 468  *      TCP v4 functions exported for the inet6 API
 469  */
 470
 471 extern void                     tcp_v4_send_check(struct sock *sk,
 472                                                   struct tcphdr *th, int len,
 473                                                   struct sk_buff *skb);
 474
 475 extern int                      tcp_v4_conn_request(struct sock *sk,
 476                                                     struct sk_buff *skb);
 477
 478 extern struct sock *            tcp_create_openreq_child(struct sock *sk,
 479                                                          struct request_sock *req,
 480                                                          struct sk_buff *skb);
 481
 482 extern struct sock *            tcp_v4_syn_recv_sock(struct sock *sk,
 483                                                      struct sk_buff *skb,
 484                                                      struct request_sock *req,
 485                                                         struct dst_entry *dst);
 486
 487 extern int                      tcp_v4_do_rcv(struct sock *sk,
 488                                               struct sk_buff *skb);
 489
 490 extern int                      tcp_v4_connect(struct sock *sk,
 491                                                struct sockaddr *uaddr,
 492                                                int addr_len);
 493
 494 extern int                      tcp_connect(struct sock *sk);
 495
 496 extern struct sk_buff *         tcp_make_synack(struct sock *sk,
 497                                                 struct dst_entry *dst,
 498                                                 struct request_sock *req);
 499
 500 extern int                      tcp_disconnect(struct sock *sk, int flags);
 501
 502 extern void                     tcp_unhash(struct sock *sk);
 503
 504 extern int                      tcp_v4_hash_connecting(struct sock *sk);
 505
 506
 507 /* From syncookies.c */
 508 extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 509                                     struct ip_options *opt);
 510 extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
 511                                      __u16 *mss);
 512
 513 /* tcp_output.c */
 514
 515 extern void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
 516                                       unsigned int cur_mss, int nonagle);
 517 extern int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp);
 518 extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
 519 extern void tcp_xmit_retransmit_queue(struct sock *);
 520 extern void tcp_simple_retransmit(struct sock *);
 521 extern int tcp_trim_head(struct sock *, struct sk_buff *, u32);
 522
 523 extern void tcp_send_probe0(struct sock *);
 524 extern void tcp_send_partial(struct sock *);
 525 extern int  tcp_write_wakeup(struct sock *);
 526 extern void tcp_send_fin(struct sock *sk);
 527 extern void tcp_send_active_reset(struct sock *sk,
 528                                   unsigned int __nocast priority);
 529 extern int  tcp_send_synack(struct sock *);
 530 extern void tcp_push_one(struct sock *, unsigned int mss_now);
 531 extern void tcp_send_ack(struct sock *sk);
 532 extern void tcp_send_delayed_ack(struct sock *sk);
 533
 534 /* tcp_input.c */
 535 extern void tcp_cwnd_application_limited(struct sock *sk);
 536
 537 /* tcp_timer.c */
 538 extern void tcp_init_xmit_timers(struct sock *);
 539 static inline void tcp_clear_xmit_timers(struct sock *sk)
 540 {
 541         inet_csk_clear_xmit_timers(sk);
 542 }
 543
 544 extern void inet_csk_delete_keepalive_timer(struct sock *sk);
 545 extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout);
 546 extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
 547 extern unsigned int tcp_current_mss(struct sock *sk, int large);
 548
 549 #ifdef INET_CSK_DEBUG
 550 extern const char inet_csk_timer_bug_msg[];
 551 #endif
 552
 553 /* tcp_diag.c */
 554 extern void tcp_get_info(struct sock *, struct tcp_info *);
 555
 556 /* Read 'sendfile()'-style from a TCP socket */
 557 typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
 558                                 unsigned int, size_t);
 559 extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 560                          sk_read_actor_t recv_actor);
 561
 562 static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
 563 {
 564         struct inet_connection_sock *icsk = inet_csk(sk);
 565
 566         if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
 567                 icsk->icsk_pending = 0;
 568 #ifdef INET_CSK_CLEAR_TIMERS
 569                 sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
 570 #endif
 571         } else if (what == ICSK_TIME_DACK) {
 572                 icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0;
 573 #ifdef INET_CSK_CLEAR_TIMERS
 574                 sk_stop_timer(sk, &icsk->icsk_delack_timer);
 575 #endif
 576         }
 577 #ifdef INET_CSK_DEBUG
 578         else {
 579                 pr_debug(inet_csk_timer_bug_msg);
 580         }
 581 #endif
 582 }
 583
 584 /*
 585  *      Reset the retransmission timer
 586  */
 587 static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
 588                                              unsigned long when)
 589 {
 590         struct inet_connection_sock *icsk = inet_csk(sk);
 591
 592         if (when > TCP_RTO_MAX) {
 593 #ifdef INET_CSK_DEBUG
 594                 pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n",
 595                          sk, what, when, current_text_addr());
 596 #endif
 597                 when = TCP_RTO_MAX;
 598         }
 599
 600         if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
 601                 icsk->icsk_pending = what;
 602                 icsk->icsk_timeout = jiffies + when;
 603                 sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
 604         } else if (what == ICSK_TIME_DACK) {
 605                 icsk->icsk_ack.pending |= ICSK_ACK_TIMER;
 606                 icsk->icsk_ack.timeout = jiffies + when;
 607                 sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
 608         }
 609 #ifdef INET_CSK_DEBUG
 610         else {
 611                 pr_debug(inet_csk_timer_bug_msg);
 612         }
 613 #endif
 614 }
 615
 616 /* Initialize RCV_MSS value.
 617  * RCV_MSS is an our guess about MSS used by the peer.
 618  * We haven't any direct information about the MSS.
 619  * It's better to underestimate the RCV_MSS rather than overestimate.
 620  * Overestimations make us ACKing less frequently than needed.
 621  * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
 622  */
 623
 624 static inline void tcp_initialize_rcv_mss(struct sock *sk)
 625 {
 626         struct tcp_sock *tp = tcp_sk(sk);
 627         unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
 628
 629         hint = min(hint, tp->rcv_wnd/2);
 630         hint = min(hint, TCP_MIN_RCVMSS);
 631         hint = max(hint, TCP_MIN_MSS);
 632
 633         inet_csk(sk)->icsk_ack.rcv_mss = hint;
 634 }
 635
 636 static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
 637 {
 638         tp->pred_flags = htonl((tp->tcp_header_len << 26) |
 639                                ntohl(TCP_FLAG_ACK) |
 640                                snd_wnd);
 641 }
 642
 643 static __inline__ void tcp_fast_path_on(struct tcp_sock *tp)
 644 {
 645         __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
 646 }
 647
 648 static inline void tcp_fast_path_check(struct sock *sk, struct tcp_sock *tp)
 649 {
 650         if (skb_queue_empty(&tp->out_of_order_queue) &&
 651             tp->rcv_wnd &&
 652             atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
 653             !tp->urg_data)
 654                 tcp_fast_path_on(tp);
 655 }
 656
 657 /* Compute the actual receive window we are currently advertising.
 658  * Rcv_nxt can be after the window if our peer push more data
 659  * than the offered window.
 660  */
 661 static __inline__ u32 tcp_receive_window(const struct tcp_sock *tp)
 662 {
 663         s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
 664
 665         if (win < 0)
 666                 win = 0;
 667         return (u32) win;
 668 }
 669
 670 /* Choose a new window, without checks for shrinking, and without
 671  * scaling applied to the result.  The caller does these things
 672  * if necessary.  This is a "raw" window selection.
 673  */
 674 extern u32      __tcp_select_window(struct sock *sk);
 675
 676 /* TCP timestamps are only 32-bits, this causes a slight
 677  * complication on 64-bit systems since we store a snapshot
 678  * of jiffies in the buffer control blocks below.  We decidely
 679  * only use of the low 32-bits of jiffies and hide the ugly
 680  * casts with the following macro.
 681  */
 682 #define tcp_time_stamp          ((__u32)(jiffies))
 683
 684 /* This is what the send packet queueing engine uses to pass
 685  * TCP per-packet control information to the transmission
 686  * code.  We also store the host-order sequence numbers in
 687  * here too.  This is 36 bytes on 32-bit architectures,
 688  * 40 bytes on 64-bit machines, if this grows please adjust
 689  * skbuff.h:skbuff->cb[xxx] size appropriately.
 690  */
 691 struct tcp_skb_cb {
 692         union {
 693                 struct inet_skb_parm    h4;
 694 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 695                 struct inet6_skb_parm   h6;
 696 #endif
 697         } header;       /* For incoming frames          */
 698         __u32           seq;            /* Starting sequence number     */
 699         __u32           end_seq;        /* SEQ + FIN + SYN + datalen    */
 700         __u32           when;           /* used to compute rtt's        */
 701         __u8            flags;          /* TCP header flags.            */
 702
 703         /* NOTE: These must match up to the flags byte in a
 704          *       real TCP header.
 705          */
 706 #define TCPCB_FLAG_FIN          0x01
 707 #define TCPCB_FLAG_SYN          0x02
 708 #define TCPCB_FLAG_RST          0x04
 709 #define TCPCB_FLAG_PSH          0x08
 710 #define TCPCB_FLAG_ACK          0x10
 711 #define TCPCB_FLAG_URG          0x20
 712 #define TCPCB_FLAG_ECE          0x40
 713 #define TCPCB_FLAG_CWR          0x80
 714
 715         __u8            sacked;         /* State flags for SACK/FACK.   */
 716 #define TCPCB_SACKED_ACKED      0x01    /* SKB ACK'd by a SACK block    */
 717 #define TCPCB_SACKED_RETRANS    0x02    /* SKB retransmitted            */
 718 #define TCPCB_LOST              0x04    /* SKB is lost                  */
 719 #define TCPCB_TAGBITS           0x07    /* All tag bits                 */
 720
 721 #define TCPCB_EVER_RETRANS      0x80    /* Ever retransmitted frame     */
 722 #define TCPCB_RETRANS           (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
 723
 724 #define TCPCB_URG               0x20    /* Urgent pointer advenced here */
 725
 726 #define TCPCB_AT_TAIL           (TCPCB_URG)
 727
 728         __u16           urg_ptr;        /* Valid w/URG flags is set.    */
 729         __u32           ack_seq;        /* Sequence number ACK'd        */
 730 };
 731
 732 #define TCP_SKB_CB(__skb)       ((struct tcp_skb_cb *)&((__skb)->cb[0]))
 733
 734 #include <net/tcp_ecn.h>
 735
 736 /* Due to TSO, an SKB can be composed of multiple actual
 737  * packets.  To keep these tracked properly, we use this.
 738  */
 739 static inline int tcp_skb_pcount(const struct sk_buff *skb)
 740 {
 741         return skb_shinfo(skb)->tso_segs;
 742 }
 743
 744 /* This is valid iff tcp_skb_pcount() > 1. */
 745 static inline int tcp_skb_mss(const struct sk_buff *skb)
 746 {
 747         return skb_shinfo(skb)->tso_size;
 748 }
 749
 750 static inline void tcp_dec_pcount_approx(__u32 *count,
 751                                          const struct sk_buff *skb)
 752 {
 753         if (*count) {
 754                 *count -= tcp_skb_pcount(skb);
 755                 if ((int)*count < 0)
 756                         *count = 0;
 757         }
 758 }
 759
 760 static inline void tcp_packets_out_inc(struct sock *sk,
 761                                        struct tcp_sock *tp,
 762                                        const struct sk_buff *skb)
 763 {
 764         int orig = tp->packets_out;
 765
 766         tp->packets_out += tcp_skb_pcount(skb);
 767         if (!orig)
 768                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto);
 769 }
 770
 771 static inline void tcp_packets_out_dec(struct tcp_sock *tp,
 772                                        const struct sk_buff *skb)
 773 {
 774         tp->packets_out -= tcp_skb_pcount(skb);
 775 }
 776
 777 /* Events passed to congestion control interface */
 778 enum tcp_ca_event {
 779         CA_EVENT_TX_START,      /* first transmit when no packets in flight */
 780         CA_EVENT_CWND_RESTART,  /* congestion window restart */
 781         CA_EVENT_COMPLETE_CWR,  /* end of congestion recovery */
 782         CA_EVENT_FRTO,          /* fast recovery timeout */
 783         CA_EVENT_LOSS,          /* loss timeout */
 784         CA_EVENT_FAST_ACK,      /* in sequence ack */
 785         CA_EVENT_SLOW_ACK,      /* other ack */
 786 };
 787
 788 /*
 789  * Interface for adding new TCP congestion control handlers
 790  */
 791 #define TCP_CA_NAME_MAX 16
 792 struct tcp_congestion_ops {
 793         struct list_head        list;
 794
 795         /* initialize private data (optional) */
 796         void (*init)(struct tcp_sock *tp);
 797         /* cleanup private data  (optional) */
 798         void (*release)(struct tcp_sock *tp);
 799
 800         /* return slow start threshold (required) */
 801         u32 (*ssthresh)(struct tcp_sock *tp);
 802         /* lower bound for congestion window (optional) */
 803         u32 (*min_cwnd)(struct tcp_sock *tp);
 804         /* do new cwnd calculation (required) */
 805         void (*cong_avoid)(struct tcp_sock *tp, u32 ack,
 806                            u32 rtt, u32 in_flight, int good_ack);
 807         /* round trip time sample per acked packet (optional) */
 808         void (*rtt_sample)(struct tcp_sock *tp, u32 usrtt);
 809         /* call before changing ca_state (optional) */
 810         void (*set_state)(struct tcp_sock *tp, u8 new_state);
 811         /* call when cwnd event occurs (optional) */
 812         void (*cwnd_event)(struct tcp_sock *tp, enum tcp_ca_event ev);
 813         /* new value of cwnd after loss (optional) */
 814         u32  (*undo_cwnd)(struct tcp_sock *tp);
 815         /* hook for packet ack accounting (optional) */
 816         void (*pkts_acked)(struct tcp_sock *tp, u32 num_acked);
 817         /* get info for tcp_diag (optional) */
 818         void (*get_info)(struct tcp_sock *tp, u32 ext, struct sk_buff *skb);
 819
 820         char            name[TCP_CA_NAME_MAX];
 821         struct module   *owner;
 822 };
 823
 824 extern int tcp_register_congestion_control(struct tcp_congestion_ops *type);
 825 extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
 826
 827 extern void tcp_init_congestion_control(struct tcp_sock *tp);
 828 extern void tcp_cleanup_congestion_control(struct tcp_sock *tp);
 829 extern int tcp_set_default_congestion_control(const char *name);
 830 extern void tcp_get_default_congestion_control(char *name);
 831 extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name);
 832
 833 extern struct tcp_congestion_ops tcp_init_congestion_ops;
 834 extern u32 tcp_reno_ssthresh(struct tcp_sock *tp);
 835 extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack,
 836                                 u32 rtt, u32 in_flight, int flag);
 837 extern u32 tcp_reno_min_cwnd(struct tcp_sock *tp);
 838 extern struct tcp_congestion_ops tcp_reno;
 839
 840 static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state)
 841 {
 842         if (tp->ca_ops->set_state)
 843                 tp->ca_ops->set_state(tp, ca_state);
 844         tp->ca_state = ca_state;
 845 }
 846
 847 static inline void tcp_ca_event(struct tcp_sock *tp, enum tcp_ca_event event)
 848 {
 849         if (tp->ca_ops->cwnd_event)
 850                 tp->ca_ops->cwnd_event(tp, event);
 851 }
 852
 853 /* This determines how many packets are "in the network" to the best
 854  * of our knowledge.  In many cases it is conservative, but where
 855  * detailed information is available from the receiver (via SACK
 856  * blocks etc.) we can make more aggressive calculations.
 857  *
 858  * Use this for decisions involving congestion control, use just
 859  * tp->packets_out to determine if the send queue is empty or not.
 860  *
 861  * Read this equation as:
 862  *
 863  *      "Packets sent once on transmission queue" MINUS
 864  *      "Packets left network, but not honestly ACKed yet" PLUS
 865  *      "Packets fast retransmitted"
 866  */
 867 static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
 868 {
 869         return (tp->packets_out - tp->left_out + tp->retrans_out);
 870 }
 871
 872 /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
 873  * The exception is rate halving phase, when cwnd is decreasing towards
 874  * ssthresh.
 875  */
 876 static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp)
 877 {
 878         if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery))
 879                 return tp->snd_ssthresh;
 880         else
 881                 return max(tp->snd_ssthresh,
 882                            ((tp->snd_cwnd >> 1) +
 883                             (tp->snd_cwnd >> 2)));
 884 }
 885
 886 static inline void tcp_sync_left_out(struct tcp_sock *tp)
 887 {
 888         if (tp->rx_opt.sack_ok &&
 889             (tp->sacked_out >= tp->packets_out - tp->lost_out))
 890                 tp->sacked_out = tp->packets_out - tp->lost_out;
 891         tp->left_out = tp->sacked_out + tp->lost_out;
 892 }
 893
 894 /* Set slow start threshold and cwnd not falling to slow start */
 895 static inline void __tcp_enter_cwr(struct tcp_sock *tp)
 896 {
 897         tp->undo_marker = 0;
 898         tp->snd_ssthresh = tp->ca_ops->ssthresh(tp);
 899         tp->snd_cwnd = min(tp->snd_cwnd,
 900                            tcp_packets_in_flight(tp) + 1U);
 901         tp->snd_cwnd_cnt = 0;
 902         tp->high_seq = tp->snd_nxt;
 903         tp->snd_cwnd_stamp = tcp_time_stamp;
 904         TCP_ECN_queue_cwr(tp);
 905 }
 906
 907 static inline void tcp_enter_cwr(struct tcp_sock *tp)
 908 {
 909         tp->prior_ssthresh = 0;
 910         if (tp->ca_state < TCP_CA_CWR) {
 911                 __tcp_enter_cwr(tp);
 912                 tcp_set_ca_state(tp, TCP_CA_CWR);
 913         }
 914 }
 915
 916 extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
 917
 918 /* Slow start with delack produces 3 packets of burst, so that
 919  * it is safe "de facto".
 920  */
 921 static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp)
 922 {
 923         return 3;
 924 }
 925
 926 static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss,
 927                                            const struct sk_buff *skb)
 928 {
 929         if (skb->len < mss)
 930                 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
 931 }
 932
 933 static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp)
 934 {
 935         const struct inet_connection_sock *icsk = inet_csk(sk);
 936         if (!tp->packets_out && !icsk->icsk_pending)
 937                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, icsk->icsk_rto);
 938 }
 939
 940 static __inline__ void tcp_push_pending_frames(struct sock *sk,
 941                                                struct tcp_sock *tp)
 942 {
 943         __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle);
 944 }
 945
 946 static __inline__ void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq)
 947 {
 948         tp->snd_wl1 = seq;
 949 }
 950
 951 static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq)
 952 {
 953         tp->snd_wl1 = seq;
 954 }
 955
 956 extern void tcp_destroy_sock(struct sock *sk);
 957
 958
 959 /*
 960  * Calculate(/check) TCP checksum
 961  */
 962 static __inline__ u16 tcp_v4_check(struct tcphdr *th, int len,
 963                                    unsigned long saddr, unsigned long daddr,
 964                                    unsigned long base)
 965 {
 966         return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
 967 }
 968
 969 static __inline__ int __tcp_checksum_complete(struct sk_buff *skb)
 970 {
 971         return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
 972 }
 973
 974 static __inline__ int tcp_checksum_complete(struct sk_buff *skb)
 975 {
 976         return skb->ip_summed != CHECKSUM_UNNECESSARY &&
 977                 __tcp_checksum_complete(skb);
 978 }
 979
 980 /* Prequeue for VJ style copy to user, combined with checksumming. */
 981
 982 static __inline__ void tcp_prequeue_init(struct tcp_sock *tp)
 983 {
 984         tp->ucopy.task = NULL;
 985         tp->ucopy.len = 0;
 986         tp->ucopy.memory = 0;
 987         skb_queue_head_init(&tp->ucopy.prequeue);
 988 }
 989
 990 /* Packet is added to VJ-style prequeue for processing in process
 991  * context, if a reader task is waiting. Apparently, this exciting
 992  * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
 993  * failed somewhere. Latency? Burstiness? Well, at least now we will
 994  * see, why it failed. 8)8)                               --ANK
 995  *
 996  * NOTE: is this not too big to inline?
 997  */
 998 static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
 999 {
1000         struct tcp_sock *tp = tcp_sk(sk);
1001
1002         if (!sysctl_tcp_low_latency && tp->ucopy.task) {
1003                 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1004                 tp->ucopy.memory += skb->truesize;
1005                 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1006                         struct sk_buff *skb1;
1007
1008                         BUG_ON(sock_owned_by_user(sk));
1009
1010                         while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1011                                 sk->sk_backlog_rcv(sk, skb1);
1012                                 NET_INC_STATS_BH(LINUX_MIB_TCPPREQUEUEDROPPED);
1013                         }
1014
1015                         tp->ucopy.memory = 0;
1016                 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1017                         wake_up_interruptible(sk->sk_sleep);
1018                         if (!inet_csk_ack_scheduled(sk))
1019                                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1020                                                           (3 * TCP_RTO_MIN) / 4);
1021                 }
1022                 return 1;
1023         }
1024         return 0;
1025 }
1026
1027
1028 #undef STATE_TRACE
1029
1030 #ifdef STATE_TRACE
1031 static const char *statename[]={
1032         "Unused","Established","Syn Sent","Syn Recv",
1033         "Fin Wait 1","Fin Wait 2","Time Wait", "Close",
1034         "Close Wait","Last ACK","Listen","Closing"
1035 };
1036 #endif
1037
1038 static __inline__ void tcp_set_state(struct sock *sk, int state)
1039 {
1040         int oldstate = sk->sk_state;
1041
1042         switch (state) {
1043         case TCP_ESTABLISHED:
1044                 if (oldstate != TCP_ESTABLISHED)
1045                         TCP_INC_STATS(TCP_MIB_CURRESTAB);
1046                 break;
1047
1048         case TCP_CLOSE:
1049                 if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
1050                         TCP_INC_STATS(TCP_MIB_ESTABRESETS);
1051
1052                 sk->sk_prot->unhash(sk);
1053                 if (inet_csk(sk)->icsk_bind_hash &&
1054                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
1055                         inet_put_port(&tcp_hashinfo, sk);
1056                 /* fall through */
1057         default:
1058                 if (oldstate==TCP_ESTABLISHED)
1059                         TCP_DEC_STATS(TCP_MIB_CURRESTAB);
1060         }
1061
1062         /* Change state AFTER socket is unhashed to avoid closed
1063          * socket sitting in hash tables.
1064          */
1065         sk->sk_state = state;
1066
1067 #ifdef STATE_TRACE
1068         SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n",sk, statename[oldstate],statename[state]);
1069 #endif
1070 }
1071
1072 static __inline__ void tcp_done(struct sock *sk)
1073 {
1074         tcp_set_state(sk, TCP_CLOSE);
1075         tcp_clear_xmit_timers(sk);
1076
1077         sk->sk_shutdown = SHUTDOWN_MASK;
1078
1079         if (!sock_flag(sk, SOCK_DEAD))
1080                 sk->sk_state_change(sk);
1081         else
1082                 tcp_destroy_sock(sk);
1083 }
1084
1085 static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt)
1086 {
1087         rx_opt->dsack = 0;
1088         rx_opt->eff_sacks = 0;
1089         rx_opt->num_sacks = 0;
1090 }
1091
1092 static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp, __u32 tstamp)
1093 {
1094         if (tp->rx_opt.tstamp_ok) {
1095                 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
1096                                           (TCPOPT_NOP << 16) |
1097                                           (TCPOPT_TIMESTAMP << 8) |
1098                                           TCPOLEN_TIMESTAMP);
1099                 *ptr++ = htonl(tstamp);
1100                 *ptr++ = htonl(tp->rx_opt.ts_recent);
1101         }
1102         if (tp->rx_opt.eff_sacks) {
1103                 struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
1104                 int this_sack;
1105
1106                 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
1107                                           (TCPOPT_NOP << 16) |
1108                                           (TCPOPT_SACK << 8) |
1109                                           (TCPOLEN_SACK_BASE +
1110                                            (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)));
1111                 for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
1112                         *ptr++ = htonl(sp[this_sack].start_seq);
1113                         *ptr++ = htonl(sp[this_sack].end_seq);
1114                 }
1115                 if (tp->rx_opt.dsack) {
1116                         tp->rx_opt.dsack = 0;
1117                         tp->rx_opt.eff_sacks--;
1118                 }
1119         }
1120 }
1121
1122 /* Construct a tcp options header for a SYN or SYN_ACK packet.
1123  * If this is every changed make sure to change the definition of
1124  * MAX_SYN_SIZE to match the new maximum number of options that you
1125  * can generate.
1126  */
1127 static inline void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
1128                                              int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent)
1129 {
1130         /* We always get an MSS option.
1131          * The option bytes which will be seen in normal data
1132          * packets should timestamps be used, must be in the MSS
1133          * advertised.  But we subtract them from tp->mss_cache so
1134          * that calculations in tcp_sendmsg are simpler etc.
1135          * So account for this fact here if necessary.  If we
1136          * don't do this correctly, as a receiver we won't
1137          * recognize data packets as being full sized when we
1138          * should, and thus we won't abide by the delayed ACK
1139          * rules correctly.
1140          * SACKs don't matter, we never delay an ACK when we
1141          * have any of those going out.
1142          */
1143         *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
1144         if (ts) {
1145                 if(sack)
1146                         *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) |
1147                                                   (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1148                 else
1149                         *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1150                                                   (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1151                 *ptr++ = htonl(tstamp);         /* TSVAL */
1152                 *ptr++ = htonl(ts_recent);      /* TSECR */
1153         } else if(sack)
1154                 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1155                                           (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
1156         if (offer_wscale)
1157                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
1158 }
1159
1160 /* Determine a window scaling and initial window to offer. */
1161 extern void tcp_select_initial_window(int __space, __u32 mss,
1162                                       __u32 *rcv_wnd, __u32 *window_clamp,
1163                                       int wscale_ok, __u8 *rcv_wscale);
1164
1165 static inline int tcp_win_from_space(int space)
1166 {
1167         return sysctl_tcp_adv_win_scale<=0 ?
1168                 (space>>(-sysctl_tcp_adv_win_scale)) :
1169                 space - (space>>sysctl_tcp_adv_win_scale);
1170 }
1171
1172 /* Note: caller must be prepared to deal with negative returns */
1173 static inline int tcp_space(const struct sock *sk)
1174 {
1175         return tcp_win_from_space(sk->sk_rcvbuf -
1176                                   atomic_read(&sk->sk_rmem_alloc));
1177 }
1178
1179 static inline int tcp_full_space(const struct sock *sk)
1180 {
1181         return tcp_win_from_space(sk->sk_rcvbuf);
1182 }
1183
1184 static inline void inet_csk_reqsk_queue_add(struct sock *sk,
1185                                             struct request_sock *req,
1186                                             struct sock *child)
1187 {
1188         reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child);
1189 }
1190
1191 static inline void inet_csk_reqsk_queue_removed(struct sock *sk,
1192                                                 struct request_sock *req)
1193 {
1194         if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0)
1195                 inet_csk_delete_keepalive_timer(sk);
1196 }
1197
1198 static inline void inet_csk_reqsk_queue_added(struct sock *sk,
1199                                               const unsigned long timeout)
1200 {
1201         if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0)
1202                 inet_csk_reset_keepalive_timer(sk, timeout);
1203 }
1204
1205 static inline int inet_csk_reqsk_queue_len(const struct sock *sk)
1206 {
1207         return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue);
1208 }
1209
1210 static inline int inet_csk_reqsk_queue_young(const struct sock *sk)
1211 {
1212         return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue);
1213 }
1214
1215 static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
1216 {
1217         return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue);
1218 }
1219
1220 static inline void inet_csk_reqsk_queue_unlink(struct sock *sk,
1221                                                struct request_sock *req,
1222                                                struct request_sock **prev)
1223 {
1224         reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev);
1225 }
1226
1227 static inline void inet_csk_reqsk_queue_drop(struct sock *sk,
1228                                              struct request_sock *req,
1229                                              struct request_sock **prev)
1230 {
1231         inet_csk_reqsk_queue_unlink(sk, req, prev);
1232         inet_csk_reqsk_queue_removed(sk, req);
1233         reqsk_free(req);
1234 }
1235
1236 static __inline__ void tcp_openreq_init(struct request_sock *req,
1237                                         struct tcp_options_received *rx_opt,
1238                                         struct sk_buff *skb)
1239 {
1240         struct inet_request_sock *ireq = inet_rsk(req);
1241
1242         req->rcv_wnd = 0;               /* So that tcp_send_synack() knows! */
1243         tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
1244         req->mss = rx_opt->mss_clamp;
1245         req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
1246         ireq->tstamp_ok = rx_opt->tstamp_ok;
1247         ireq->sack_ok = rx_opt->sack_ok;
1248         ireq->snd_wscale = rx_opt->snd_wscale;
1249         ireq->wscale_ok = rx_opt->wscale_ok;
1250         ireq->acked = 0;
1251         ireq->ecn_ok = 0;
1252         ireq->rmt_port = skb->h.th->source;
1253 }
1254
1255 extern void tcp_enter_memory_pressure(void);
1256
1257 static inline int keepalive_intvl_when(const struct tcp_sock *tp)
1258 {
1259         return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
1260 }
1261
1262 static inline int keepalive_time_when(const struct tcp_sock *tp)
1263 {
1264         return tp->keepalive_time ? : sysctl_tcp_keepalive_time;
1265 }
1266
1267 static inline int tcp_fin_time(const struct sock *sk)
1268 {
1269         int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout;
1270         const int rto = inet_csk(sk)->icsk_rto;
1271
1272         if (fin_timeout < (rto << 2) - (rto >> 1))
1273                 fin_timeout = (rto << 2) - (rto >> 1);
1274
1275         return fin_timeout;
1276 }
1277
1278 static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int rst)
1279 {
1280         if ((s32)(rx_opt->rcv_tsval - rx_opt->ts_recent) >= 0)
1281                 return 0;
1282         if (xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)
1283                 return 0;
1284
1285         /* RST segments are not recommended to carry timestamp,
1286            and, if they do, it is recommended to ignore PAWS because
1287            "their cleanup function should take precedence over timestamps."
1288            Certainly, it is mistake. It is necessary to understand the reasons
1289            of this constraint to relax it: if peer reboots, clock may go
1290            out-of-sync and half-open connections will not be reset.
1291            Actually, the problem would be not existing if all
1292            the implementations followed draft about maintaining clock
1293            via reboots. Linux-2.2 DOES NOT!
1294
1295            However, we can relax time bounds for RST segments to MSL.
1296          */
1297         if (rst && xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
1298                 return 0;
1299         return 1;
1300 }
1301
1302 #define TCP_CHECK_TIMER(sk) do { } while (0)
1303
1304 static inline int tcp_use_frto(const struct sock *sk)
1305 {
1306         const struct tcp_sock *tp = tcp_sk(sk);
1307
1308         /* F-RTO must be activated in sysctl and there must be some
1309          * unsent new data, and the advertised window should allow
1310          * sending it.
1311          */
1312         return (sysctl_tcp_frto && sk->sk_send_head &&
1313                 !after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
1314                        tp->snd_una + tp->snd_wnd));
1315 }
1316
1317 static inline void tcp_mib_init(void)
1318 {
1319         /* See RFC 2012 */
1320         TCP_ADD_STATS_USER(TCP_MIB_RTOALGORITHM, 1);
1321         TCP_ADD_STATS_USER(TCP_MIB_RTOMIN, TCP_RTO_MIN*1000/HZ);
1322         TCP_ADD_STATS_USER(TCP_MIB_RTOMAX, TCP_RTO_MAX*1000/HZ);
1323         TCP_ADD_STATS_USER(TCP_MIB_MAXCONN, -1);
1324 }
1325
1326 /* /proc */
1327 enum tcp_seq_states {
1328         TCP_SEQ_STATE_LISTENING,
1329         TCP_SEQ_STATE_OPENREQ,
1330         TCP_SEQ_STATE_ESTABLISHED,
1331         TCP_SEQ_STATE_TIME_WAIT,
1332 };
1333
1334 struct tcp_seq_afinfo {
1335         struct module           *owner;
1336         char                    *name;
1337         sa_family_t             family;
1338         int                     (*seq_show) (struct seq_file *m, void *v);
1339         struct file_operations  *seq_fops;
1340 };
1341
1342 struct tcp_iter_state {
1343         sa_family_t             family;
1344         enum tcp_seq_states     state;
1345         struct sock             *syn_wait_sk;
1346         int                     bucket, sbucket, num, uid;
1347         struct seq_operations   seq_ops;
1348 };
1349
1350 extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo);
1351 extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo);
1352
1353 #endif  /* _TCP_H */