Monday, April 25, 2016

TCP reloaded (part 3)

Exploratory Analysis - step #2

In this post I go on with the exploratory analysis of the data as generated by the first TCP post serie. At the purpose, I load it back.

suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(Rmisc))
suppressPackageStartupMessages(library(knitr))

df <- read.csv("TCP_ea.csv", header=TRUE, stringsAsFactors = FALSE, sep=",")
dim(df)
## [1] 2501   10
kable(head(df,5))
time W R q x p S status S_slope S_slope_rate
0.00 2 0.2000000 0.0 0.0000000 0 10.000000 slowstart decrease high
0.02 2 0.2066667 0.2 0.0000000 0 9.677419 slowstart increase high
0.04 2 0.2000000 0.0 0.0015038 0 10.000000 slowstart decrease high
0.06 2 0.2066667 0.2 0.0014925 0 9.677419 slowstart increase high
0.08 2 0.2000000 0.0 0.0029851 0 10.000000 slowstart decrease high
kable(tail(df,5))
time W R q x p S status S_slope S_slope_rate
2497 49.92 27.41972 0.9893418 23.68025 21.86931 0.0463816 27.71512 ca_steadystate decrease high
2498 49.94 27.25502 0.9878185 23.63456 21.88293 0.0467328 27.59112 ca_steadystate decrease high
2499 49.96 27.08724 0.9862126 23.58638 21.89610 0.0470732 27.46593 ca_steadystate decrease high
2500 49.98 26.91647 0.9845232 23.53570 21.90881 0.0474025 27.33960 ca_steadystate decrease high
2501 50.00 26.74279 0.9827496 23.48249 21.92104 0.0477203 27.21221 ca_steadystate decrease high

To follow, summaries of the TCP variables \({W(t), R(t), q(t), x(q(t)), p(x(t)), S(t)}\) conditioned on TCP status value, {slow start, congestion avoidance transient state, congestion avoidance steady state}.

a <- aggregate(W~status, data=df, FUN=summary)
kable(data.frame(status=a$status, W=a$W))
status W.Min. W.1st.Qu. W.Median W.Mean W.3rd.Qu. W.Max.
ca_steadystate 10.77 17.18 22.920 22.120 27.47 30.86
ca_transient 10.59 18.02 23.270 22.600 27.59 30.97
slowstart 2.00 2.00 3.774 4.528 6.25 10.54
ggplot(data=df, aes(x=status, y=W)) + geom_boxplot(aes(fill=status))

a <- aggregate(R~status, data=df, FUN=summary)
kable(data.frame(status=a$status, R=a$R))
status R.Min. R.1st.Qu. R.Median R.Mean R.3rd.Qu. R.Max.
ca_steadystate 0.3949 0.5704 0.7477 0.7292 0.8967 1.0040
ca_transient 0.2592 0.5993 0.7725 0.7423 0.9089 1.0080
slowstart 0.2000 0.2000 0.2067 0.2109 0.2175 0.2512
ggplot(data=df, aes(x=status, y=R)) + geom_boxplot(aes(fill=status))

a <- aggregate(q~status, data=df, FUN=summary)
kable(data.frame(status=a$status, q=a$q))
status q.Min. q.1st.Qu. q.Median q.Mean q.3rd.Qu. q.Max.
ca_steadystate 5.848 11.11 16.43 15.8800 20.9000 24.130
ca_transient 1.775 11.98 17.18 16.2700 21.2700 24.240
slowstart 0.000 0.00 0.20 0.3257 0.5246 1.536
ggplot(data=df, aes(x=status, y=q)) + geom_boxplot(aes(fill=status))

a <- aggregate(x~status, data=df, FUN=summary)
kable(data.frame(status=a$status, x=a$x))
status x.Min. x.1st.Qu. x.Median x.Mean x.3rd.Qu. x.Max.
ca_steadystate 11.03000 12.340000 15.40000 15.79000 18.99000 22.04000
ca_transient 0.08942 7.205000 13.81000 12.91000 19.09000 22.10000
slowstart 0.00000 0.006562 0.01458 0.02079 0.02612 0.07846
ggplot(data=df, aes(x=status, y=x)) + geom_boxplot(aes(fill=status))

From the last two table above, the average queue length and its average EWMA are both less than the RED minimum threshold which is equal to 20.

a <- aggregate(p~status, data=df, FUN=summary)
kable(data.frame(status=a$status, p=a$p))
status p.Min. p.1st.Qu. p.Median p.Mean p.3rd.Qu. p.Max.
ca_steadystate 0 0 0 0.004996 0 0.05110
ca_transient 0 0 0 0.006827 0 0.05254
slowstart 0 0 0 0.000000 0 0.00000
ggplot(data=df, aes(x=status, y=p)) + geom_boxplot(aes(fill=status))

a <- aggregate(S~status, data=df, FUN=summary)
kable(data.frame(status=a$status, S=a$S))
status S.Min. S.1st.Qu. S.Median S.Mean S.3rd.Qu. S.Max.
ca_steadystate 20.110 31.00 31.17 30.30 31.47 31.96
ca_transient 20.110 31.03 31.22 30.73 31.58 40.86
slowstart 9.677 10.00 18.66 20.95 29.15 41.95
ggplot(data=df, aes(x=status, y=S)) + geom_boxplot(aes(fill=status))

From the last table above, the average TCP transmission rate keeps values close to the link capacity one. Moreover values dispersion of S is far less than the one of W, q and R.

To follow, summaries of TCP variables \({W(t), R(t), q(t), x(q(t)), p(x(t)), S(t)}\) conditioned on both status and TCP transmission rate slope {decrease, increase}.

a <- aggregate(W~(status+S_slope), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, W=a$W))
status S_slope W.Min. W.1st.Qu. W.Median W.Mean W.3rd.Qu. W.Max.
ca_steadystate decrease 13.59 19.950 24.390 23.850 28.050 30.860
ca_transient decrease 10.59 18.020 23.270 22.600 27.590 30.970
slowstart decrease 2.00 2.000 2.000 3.827 4.381 10.540
ca_steadystate increase 10.77 11.360 12.210 12.340 13.210 14.610
slowstart increase 2.00 2.444 4.068 4.739 6.711 9.963
a <- aggregate(R~(status+S_slope), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, R=a$R))
status S_slope R.Min. R.1st.Qu. R.Median R.Mean R.3rd.Qu. R.Max.
ca_steadystate decrease 0.4502 0.6512 0.7957 0.7773 0.9169 1.0040
ca_transient decrease 0.2592 0.5993 0.7725 0.7423 0.9089 1.0080
slowstart decrease 0.2000 0.2000 0.2000 0.2057 0.2000 0.2512
ca_steadystate increase 0.3949 0.4075 0.4298 0.4577 0.4771 0.6625
slowstart increase 0.2000 0.2054 0.2085 0.2124 0.2181 0.2440
a <- aggregate(q~(status+S_slope), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, q=a$q))
status S_slope q.Min. q.1st.Qu. q.Median q.Mean q.3rd.Qu. q.Max.
ca_steadystate decrease 7.506 13.540 17.870 17.3200 21.510 24.130
ca_transient decrease 1.775 11.980 17.180 16.2700 21.270 24.240
slowstart decrease 0.000 0.000 0.000 0.1707 0.000 1.536
ca_steadystate increase 5.848 6.226 6.893 7.7300 8.312 13.880
slowstart increase 0.000 0.163 0.254 0.3722 0.542 1.320
a <- aggregate(x~(status+S_slope), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, x=a$x))
status S_slope x.Min. x.1st.Qu. x.Median x.Mean x.3rd.Qu. x.Max.
ca_steadystate decrease 11.03000 12.010000 15.130000 15.66000 19.06000 22.04000
ca_transient decrease 0.08942 7.205000 13.810000 12.91000 19.09000 22.10000
slowstart decrease 0.00000 0.002985 0.005882 0.01671 0.01831 0.07846
ca_steadystate increase 12.71000 14.180000 16.280000 16.49000 18.77000 20.81000
slowstart increase 0.00000 0.008643 0.015950 0.02201 0.03093 0.06906
a <- aggregate(p~(status+S_slope), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, p=a$p))
status S_slope p.Min. p.1st.Qu. p.Median p.Mean p.3rd.Qu. p.Max.
ca_steadystate decrease 0 0 0 0.005630 0 0.05110
ca_transient decrease 0 0 0 0.006827 0 0.05254
slowstart decrease 0 0 0 0.000000 0 0.00000
ca_steadystate increase 0 0 0 0.001416 0 0.02166
slowstart increase 0 0 0 0.000000 0 0.00000
a <- aggregate(S~(status+S_slope), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, S=a$S))
status S_slope S.Min. S.1st.Qu. S.Median S.Mean S.3rd.Qu. S.Max.
ca_steadystate decrease 20.320 31.03 31.19 30.79 31.46 31.96
ca_transient decrease 20.110 31.03 31.22 30.73 31.58 40.86
slowstart decrease 10.000 10.00 10.00 17.94 21.90 41.95
ca_steadystate increase 20.110 22.92 29.42 27.51 31.65 31.96
slowstart increase 9.677 12.09 19.50 21.85 30.87 40.83

To follow, summaries of TCP variables \({W(t), R(t), q(t), x(q(t)), p(x(t)), S(t)}\) conditioned on status, slope and slope rate level {high, low}.

a <- aggregate(W~(status+S_slope+S_slope_rate), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, S_slope_rate = a$S_slope_rate, W=a$W))
status S_slope S_slope_rate W.Min. W.1st.Qu. W.Median W.Mean W.3rd.Qu. W.Max.
ca_steadystate decrease high 13.87 21.310 27.740 25.510 30.120 30.860
ca_transient decrease high 10.59 12.910 17.290 19.450 26.730 30.970
slowstart decrease high 2.00 2.000 2.000 3.827 4.381 10.540
ca_steadystate increase high 10.77 11.240 11.870 11.990 12.690 13.760
slowstart increase high 2.00 2.444 4.068 4.739 6.711 9.963
ca_steadystate decrease low 13.59 19.870 24.080 23.630 27.670 30.830
ca_transient decrease low 13.32 19.600 23.980 23.490 27.680 30.950
ca_steadystate increase low 13.05 13.840 14.060 14.020 14.270 14.610
a <- aggregate(R~(status+S_slope+S_slope_rate), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, S_slope_rate = a$S_slope_rate, R=a$R))
status S_slope S_slope_rate R.Min. R.1st.Qu. R.Median R.Mean R.3rd.Qu. R.Max.
ca_steadystate decrease high 0.6819 0.9015 0.9921 0.9396 1.0010 1.0040
ca_transient decrease high 0.2592 0.3963 0.8099 0.7081 0.9858 1.0080
slowstart decrease high 0.2000 0.2000 0.2000 0.2057 0.2000 0.2512
ca_steadystate increase high 0.3949 0.4054 0.4208 0.4567 0.4941 0.6496
slowstart increase high 0.2000 0.2054 0.2085 0.2124 0.2181 0.2440
ca_steadystate decrease low 0.4502 0.6315 0.7709 0.7564 0.8901 0.9955
ca_transient decrease low 0.4338 0.6236 0.7675 0.7520 0.8907 0.9992
ca_steadystate increase low 0.4270 0.4362 0.4429 0.4622 0.4496 0.6625
a <- aggregate(q~(status+S_slope+S_slope_rate), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, S_slope_rate = a$S_slope_rate, q=a$q))
status S_slope S_slope_rate q.Min. q.1st.Qu. q.Median q.Mean q.3rd.Qu. q.Max.
ca_steadystate decrease high 14.460 21.050 23.760 22.1900 24.040 24.130
ca_transient decrease high 1.775 5.888 18.300 15.2400 23.580 24.240
slowstart decrease high 0.000 0.000 0.000 0.1707 0.000 1.536
ca_steadystate increase high 5.848 6.161 6.625 7.7020 8.823 13.490
slowstart increase high 0.000 0.163 0.254 0.3722 0.542 1.320
ca_steadystate decrease low 7.506 12.950 17.130 16.6900 20.700 23.860
ca_transient decrease low 7.013 12.710 17.030 16.5600 20.720 23.980
ca_steadystate increase low 6.810 7.086 7.287 7.8660 7.487 13.880
a <- aggregate(x~(status+S_slope+S_slope_rate), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, S_slope_rate = a$S_slope_rate, x=a$x))
status S_slope S_slope_rate x.Min. x.1st.Qu. x.Median x.Mean x.3rd.Qu. x.Max.
ca_steadystate decrease high 20.95000 21.380000 21.680000 21.64000 21.91000 22.04000
ca_transient decrease high 0.08942 1.333000 21.380000 13.87000 21.90000 22.10000
slowstart decrease high 0.00000 0.002985 0.005882 0.01671 0.01831 0.07846
ca_steadystate increase high 13.52000 15.050000 16.980000 17.04000 19.02000 20.71000
slowstart increase high 0.00000 0.008643 0.015950 0.02201 0.03093 0.06906
ca_steadystate decrease low 11.03000 11.820000 14.240000 14.89000 17.72000 21.07000
ca_transient decrease low 2.17400 8.040000 13.170000 12.63000 17.47000 21.10000
ca_steadystate increase low 12.71000 12.910000 13.130000 13.83000 13.37000 20.81000
a <- aggregate(p~(status+S_slope+S_slope_rate), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, S_slope_rate = a$S_slope_rate, p=a$p))
status S_slope S_slope_rate p.Min. p.1st.Qu. p.Median p.Mean p.3rd.Qu. p.Max.
ca_steadystate decrease high 0.02506 0.0345 0.04183 0.040800 0.04751 0.05110
ca_transient decrease high 0.00000 0.0000 0.03441 0.026770 0.04753 0.05254
slowstart decrease high 0.00000 0.0000 0.00000 0.000000 0.00000 0.00000
ca_steadystate increase high 0.00000 0.0000 0.00000 0.001299 0.00000 0.01901
slowstart increase high 0.00000 0.0000 0.00000 0.000000 0.00000 0.00000
ca_steadystate decrease low 0.00000 0.0000 0.00000 0.001106 0.00000 0.02631
ca_transient decrease low 0.00000 0.0000 0.00000 0.001138 0.00000 0.02701
ca_steadystate increase low 0.00000 0.0000 0.00000 0.001989 0.00000 0.02166
a <- aggregate(S~(status+S_slope+S_slope_rate), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, S_slope_rate = a$S_slope_rate, S=a$S))
status S_slope S_slope_rate S.Min. S.1st.Qu. S.Median S.Mean S.3rd.Qu. S.Max.
ca_steadystate decrease high 20.340 23.64 27.96 26.83 30.00 30.97
ca_transient decrease high 20.120 24.20 29.81 28.71 32.57 40.86
slowstart decrease high 10.000 10.00 10.00 17.94 21.90 41.95
ca_steadystate increase high 20.120 22.33 27.97 26.84 31.05 31.88
slowstart increase high 9.677 12.09 19.50 21.85 30.87 40.83
ca_steadystate decrease low 20.320 31.08 31.24 31.30 31.50 31.96
ca_transient decrease low 20.110 31.08 31.25 31.30 31.52 32.20
ca_steadystate increase low 20.110 31.89 31.92 30.80 31.93 31.96

In the following post I will show phase diagrams able to shed some light on TCP performance goods and bads.