Tail loss probe
https://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01
Index: sys/netinet/tcp_input.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_input.c,v
retrieving revision 1.347
diff -d -u -r1.347 tcp_input.c
--- sys/netinet/tcp_input.c 10 Jun 2016 13:31:44 -0000 1.347
+++ sys/netinet/tcp_input.c 25 Jul 2016 13:13:34 -0000
@@ -1998,6 +1998,11 @@
tp->ts_recent_age = tcp_now;
tp->ts_recent = opti.ts_val;
}
+ /*
+ * Disable Tail Loss Probe timer
+ */
+ if (TCP_TIMER_ISARMED(tp, TCPT_TLP))
+ TCP_TIMER_DISARM(tp, TCPT_TLP);
if (tlen == 0) {
/* Ack prediction. */
Index: sys/netinet/tcp_output.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_output.c,v
retrieving revision 1.186
diff -d -u -r1.186 tcp_output.c
--- sys/netinet/tcp_output.c 10 Jun 2016 13:27:16 -0000 1.186
+++ sys/netinet/tcp_output.c 25 Jul 2016 13:13:34 -0000
@@ -578,6 +578,11 @@
int sigoff = 0;
#endif
uint64_t *tcps;
+ uint64_t flightsize;
+
+ /* but first things first - disarm the TLP timer */
+ if (TCP_TIMER_ISARMED(tp, TCPT_TLP))
+ TCP_TIMER_DISARM(tp, TCPT_TLP);
#ifdef DIAGNOSTIC
if (tp->t_inpcb && tp->t_in6pcb)
@@ -676,6 +681,7 @@
} else {
if (idle && (tcp_now - tp->t_rcvtime) >= tp->t_rxtcur) {
/*
+ * RFC5681 Section 4.1.
* We have been idle for "a while" and no acks are
* expected to clock out any data we send --
* slow start to get ack "clock" running again.
@@ -1526,6 +1532,46 @@
}
}
+timer:
+ flightsize = (tp->snd_nxt - tp->sack_newdata) +
+ sack_bytes_rxmt;
+ if (tcp_tlp_enabled != 0 &&
+ TCPS_HAVEESTABLISHED(tp->t_state) &&
+ TCP_SACK_ENABLED(tp) &&
+ len != 0 &&
+ /* only if we've transmitted everything or cwnd reached */
+ (off + len == so->so_snd.sb_cc ||
+ tp->snd_cwnd - flightsize < tp->t_segsz) &&
+ tp->snd_tlp_nxt != tp->snd_nxt) {
+ int pto;
+ /*
+ * Arm the timer for tail loss probe
+ * Draft dukkipati Section 2.1
+ * FlightSize > 1: schedule PTO in max(2*SRTT, 10ms)
+ * tp->t_srtt >> 5 = hz expressed value
+ */
+ if (flightsize > tp->t_segsz)
+ pto = max(tp->t_srtt >> 4, mstohz(10));
+ /*
+ * FlightSize == 1:
+ * schedule PTO in max(2*SRTT, 1.5*SRTT+WCDelAckT).
+ */
+ else
+ pto = max(tp->t_srtt >> 4,
+ 3 * (tp->t_srtt >> 6) + mstohz(200));
+ /*
+ * If RTO is earlier, schedule PTO in its place:
+ * PTO = min(RTO, PTO)
+ */
+ if (pto >= TCP_REXMTVAL(tp) * hz / PR_SLOWHZ &&
+ TCP_TIMER_ISARMED(tp, TCPT_REXMT)) {
+ TCP_TIMER_DISARM(tp, TCPT_REXMT);
+ pto = TCP_REXMTVAL(tp) * hz / PR_SLOWHZ;
+ }
+ tp->snd_tlp_nxt = tp->snd_nxt;
+ TCP_TIMER_ARM_HZ(tp, TCPT_TLP, pto > 0 ? pto : 1);
+ }
+
/*
* Set retransmit timer if not currently set,
* and not doing an ack or a keep-alive probe.
@@ -1534,10 +1580,10 @@
* Initialize shift counter which is used for backoff
* of retransmit time.
*/
-timer:
if (TCP_TIMER_ISARMED(tp, TCPT_REXMT) == 0) {
- if ((sack_rxmit && tp->snd_nxt != tp->snd_max)
- || tp->snd_nxt != tp->snd_una) {
+ if (((sack_rxmit && tp->snd_nxt != tp->snd_max)
+ || tp->snd_nxt != tp->snd_una) &&
+ TCP_TIMER_ISARMED(tp, TCPT_TLP) == 0) {
if (TCP_TIMER_ISARMED(tp, TCPT_PERSIST)) {
TCP_TIMER_DISARM(tp, TCPT_PERSIST);
tp->t_rxtshift = 0;
Index: sys/netinet/tcp_seq.h
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_seq.h,v
retrieving revision 1.17
diff -d -u -r1.17 tcp_seq.h
--- sys/netinet/tcp_seq.h 25 Jul 2014 17:53:59 -0000 1.17
+++ sys/netinet/tcp_seq.h 25 Jul 2016 13:13:34 -0000
@@ -58,7 +58,8 @@
#define tcp_sendseqinit(tp) \
(tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = \
- (tp)->snd_recover = (tp)->snd_high = (tp)->snd_fack = (tp)->iss
+ (tp)->snd_recover = (tp)->snd_high = (tp)->snd_fack = \
+ (tp)->snd_tlp_nxt = (tp)->iss
#define TCP_ISS_RANDOM_MASK 0x00ffffff /* bits of randomness in a TCP ISS */
#define TCP_ISSINCR 0x01000000 /* increment per time and per conn */
Index: sys/netinet/tcp_timer.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_timer.c,v
retrieving revision 1.90
diff -d -u -r1.90 tcp_timer.c
--- sys/netinet/tcp_timer.c 26 Apr 2016 08:44:44 -0000 1.90
+++ sys/netinet/tcp_timer.c 25 Jul 2016 13:13:34 -0000
@@ -149,6 +149,8 @@
int tcp_maxpersistidle = 0; /* max idle time in persist */
+u_int tcp_tlp_enabled = 0; /* tail loss probe */
+
/*
* Time to delay the ACK. This is initialized in tcp_init(), unless
* its patched.
@@ -159,12 +161,14 @@
void tcp_timer_persist(void *);
void tcp_timer_keep(void *);
void tcp_timer_2msl(void *);
+void tcp_timer_tlp(void *);
const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = {
tcp_timer_rexmt,
tcp_timer_persist,
tcp_timer_keep,
tcp_timer_2msl,
+ tcp_timer_tlp,
};
/*
@@ -658,3 +662,61 @@
mutex_exit(softnet_lock);
KERNEL_UNLOCK_ONE(NULL);
}
+
+void
+tcp_timer_tlp(void *arg)
+{
+ struct tcpcb *tp = arg;
+#ifdef INET
+ struct inpcb *inp;
+#endif
+#ifdef INET6
+ struct in6pcb *in6p;
+#endif
+ struct socket *so = NULL;
+
+ mutex_enter(softnet_lock);
+ if ((tp->t_flags & TF_DEAD) != 0 ||
+ TCPS_HAVEESTABLISHED(tp->t_state) == 0) {
+ mutex_exit(softnet_lock);
+ return;
+ }
+ if (!callout_expired(&tp->t_timer[TCPT_TLP])) {
+ mutex_exit(softnet_lock);
+ return;
+ }
+
+ /* Do a tail loss probe rxmit */
+ KERNEL_LOCK(1, NULL);
+ /* check if tlp is still needed */
+ if (tp->snd_tlp_nxt == tp->snd_nxt ||
+ TCP_TIMER_ISARMED(tp, TCPT_REXMT) == 0) {
+
+ TCP_STATINC(TCP_STAT_TLP);
+
+ tcp_free_sackholes(tp);
+ tp->snd_fack = tp->snd_una;
+
+#ifdef INET
+ inp = tp->t_inpcb;
+ if (inp)
+ so = inp->inp_socket;
+#endif
+#ifdef INET6
+ in6p = tp->t_in6pcb;
+ if (in6p)
+ so = in6p->in6p_socket;
+#endif
+ KASSERT(so != NULL);
+ /* Check if it should rexmit */
+ if (tp->snd_tlp_nxt - tp->snd_una == so->so_snd.sb_cc)
+ tp->snd_nxt = tp->snd_una;
+
+ tp->t_force = 1;
+ (void) tcp_output(tp);
+ tp->t_force = 0;
+ }
+
+ KERNEL_UNLOCK_ONE(NULL);
+ mutex_exit(softnet_lock);
+}
Index: sys/netinet/tcp_timer.h
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_timer.h,v
retrieving revision 1.28
diff -d -u -r1.28 tcp_timer.h
--- sys/netinet/tcp_timer.h 24 May 2011 18:37:52 -0000 1.28
+++ sys/netinet/tcp_timer.h 25 Jul 2016 13:13:34 -0000
@@ -69,12 +69,13 @@
* Definitions of the TCP timers. These timers are counted
* down PR_SLOWHZ times a second.
*/
-#define TCPT_NTIMERS 4
+#define TCPT_NTIMERS 5
#define TCPT_REXMT 0 /* retransmit */
#define TCPT_PERSIST 1 /* retransmit persistance */
#define TCPT_KEEP 2 /* keep alive */
#define TCPT_2MSL 3 /* 2*msl quiet time timer */
+#define TCPT_TLP 4 /* tail loss probe */
/*
* The TCPT_REXMT timer is used to force retransmissions.
@@ -110,6 +111,9 @@
* an ack segment in response from the peer. If, despite the TCPT_KEEP
* initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE
* amount of time probing, then we drop the connection.
+ *
+ * The TCPT_TLP timer is used to trigger retransmissions on short streams
+ * that wouldn't have chance to rexmit a segment until RTO
*/
/*
@@ -141,7 +145,7 @@
#ifdef TCPTIMERS
const char *tcptimers[] =
- { "REXMT", "PERSIST", "KEEP", "2MSL" };
+ { "REXMT", "PERSIST", "KEEP", "2MSL", "TLP" };
#endif
/*
@@ -159,6 +163,13 @@
callout_schedule(&(tp)->t_timer[(timer)], \
(nticks) * (hz / PR_SLOWHZ))
+/*
+ * Arm the timer for a specific number of hz units
+ */
+#define TCP_TIMER_ARM_HZ(tp, timer, nticks) \
+ callout_schedule(&(tp)->t_timer[(timer)], \
+ (nticks))
+
#define TCP_TIMER_DISARM(tp, timer) \
callout_stop(&(tp)->t_timer[(timer)])
@@ -188,6 +199,7 @@
extern int tcp_maxpersistidle; /* max idle time in persist */
extern int tcp_ttl; /* time to live for TCP segs */
extern const int tcp_backoff[];
+extern u_int tcp_tlp_enabled; /* tail loss probe enabled */
void tcp_timer_init(void);
#endif
Index: sys/netinet/tcp_usrreq.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.212
diff -d -u -r1.212 tcp_usrreq.c
--- sys/netinet/tcp_usrreq.c 26 Apr 2016 08:44:45 -0000 1.212
+++ sys/netinet/tcp_usrreq.c 25 Jul 2016 13:13:34 -0000
@@ -2184,6 +2184,13 @@
sysctl_tcp_keep, 0, &tcp_keepcnt, 0,
CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPCNT, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
+ CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
+ CTLTYPE_INT, "tlp",
+ SYSCTL_DESCR("Enable tail loss probe"),
+ NULL, 0, &tcp_tlp_enabled, 0,
+ CTL_NET, pf, IPPROTO_TCP, TCPCTL_TLP, CTL_EOL);
+
+ sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
CTLTYPE_INT, "slowhz",
SYSCTL_DESCR("Keepalive ticks per second"),
Index: sys/netinet/tcp_var.h
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_var.h,v
retrieving revision 1.177
diff -d -u -r1.177 tcp_var.h
--- sys/netinet/tcp_var.h 14 Feb 2015 22:09:53 -0000 1.177
+++ sys/netinet/tcp_var.h 25 Jul 2016 13:13:34 -0000
@@ -235,6 +235,7 @@
tcp_seq snd_una; /* send unacknowledged */
tcp_seq snd_nxt; /* send next */
tcp_seq snd_up; /* send urgent pointer */
+ tcp_seq snd_tlp_nxt; /* next sequence when we sent tlp */
tcp_seq snd_wl1; /* window update seg seq number */
tcp_seq snd_wl2; /* window update seg ack number */
tcp_seq iss; /* initial send sequence number */
@@ -710,8 +711,9 @@
#define TCP_STAT_ECN_SHS 73 /* # of successful ECN handshakes */
#define TCP_STAT_ECN_CE 74 /* # of packets with CE bit */
#define TCP_STAT_ECN_ECT 75 /* # of packets with ECT(0) bit */
+#define TCP_STAT_TLP 76 /* # of tail loss probes sent */
-#define TCP_NSTATS 76
+#define TCP_NSTATS 77
/*
* Names for TCP sysctl objects.
@@ -754,7 +756,8 @@
#define TCPCTL_DEBX 32 /* # of tcp debug sockets */
#define TCPCTL_DROP 33 /* drop tcp connection */
#define TCPCTL_MSL 34 /* Max Segment Life */
-#define TCPCTL_MAXID 35
+#define TCPCTL_TLP 35 /* Tail Loss Probe */
+#define TCPCTL_MAXID 36
#define TCPCTL_NAMES { \
{ 0, 0 }, \
Index: usr.bin/netstat/inet.c
===================================================================
RCS file: /cvsroot/src/usr.bin/netstat/inet.c,v
retrieving revision 1.106
diff -d -u -r1.106 inet.c
--- usr.bin/netstat/inet.c 8 Feb 2015 15:09:45 -0000 1.106
+++ usr.bin/netstat/inet.c 25 Jul 2016 13:13:36 -0000
@@ -503,6 +503,7 @@
p(TCP_STAT_ECN_SHS, "\t%" PRIu64 " successful ECN handshake%s\n");
p(TCP_STAT_ECN_CE, "\t%" PRIu64 " packet%s with ECN CE bit\n");
p(TCP_STAT_ECN_ECT, "\t%" PRIu64 " packet%s ECN ECT(0) bit\n");
+ p(TCP_STAT_TLP, "\t%" PRIu64 " tail drop probe%s\n");
#undef p
#undef ps
#undef p2