Processing math: 100%

Monday, April 25, 2016

TCP reloaded (part 3)

Exploratory Analysis - step #2

In this post I go on with the exploratory analysis of the data as generated by the first TCP post serie. At the purpose, I load it back.

suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(Rmisc))
suppressPackageStartupMessages(library(knitr))

df <- read.csv("TCP_ea.csv", header=TRUE, stringsAsFactors = FALSE, sep=",")
dim(df)
## [1] 2501   10
kable(head(df,5))
time W R q x p S status S_slope S_slope_rate
0.00 2 0.2000000 0.0 0.0000000 0 10.000000 slowstart decrease high
0.02 2 0.2066667 0.2 0.0000000 0 9.677419 slowstart increase high
0.04 2 0.2000000 0.0 0.0015038 0 10.000000 slowstart decrease high
0.06 2 0.2066667 0.2 0.0014925 0 9.677419 slowstart increase high
0.08 2 0.2000000 0.0 0.0029851 0 10.000000 slowstart decrease high
kable(tail(df,5))
time W R q x p S status S_slope S_slope_rate
2497 49.92 27.41972 0.9893418 23.68025 21.86931 0.0463816 27.71512 ca_steadystate decrease high
2498 49.94 27.25502 0.9878185 23.63456 21.88293 0.0467328 27.59112 ca_steadystate decrease high
2499 49.96 27.08724 0.9862126 23.58638 21.89610 0.0470732 27.46593 ca_steadystate decrease high
2500 49.98 26.91647 0.9845232 23.53570 21.90881 0.0474025 27.33960 ca_steadystate decrease high
2501 50.00 26.74279 0.9827496 23.48249 21.92104 0.0477203 27.21221 ca_steadystate decrease high

To follow, summaries of the TCP variables W(t),R(t),q(t),x(q(t)),p(x(t)),S(t) conditioned on TCP status value, {slow start, congestion avoidance transient state, congestion avoidance steady state}.

a <- aggregate(W~status, data=df, FUN=summary)
kable(data.frame(status=a$status, W=a$W))
status W.Min. W.1st.Qu. W.Median W.Mean W.3rd.Qu. W.Max.
ca_steadystate 10.77 17.18 22.920 22.120 27.47 30.86
ca_transient 10.59 18.02 23.270 22.600 27.59 30.97
slowstart 2.00 2.00 3.774 4.528 6.25 10.54
ggplot(data=df, aes(x=status, y=W)) + geom_boxplot(aes(fill=status))

a <- aggregate(R~status, data=df, FUN=summary)
kable(data.frame(status=a$status, R=a$R))
status R.Min. R.1st.Qu. R.Median R.Mean R.3rd.Qu. R.Max.
ca_steadystate 0.3949 0.5704 0.7477 0.7292 0.8967 1.0040
ca_transient 0.2592 0.5993 0.7725 0.7423 0.9089 1.0080
slowstart 0.2000 0.2000 0.2067 0.2109 0.2175 0.2512
ggplot(data=df, aes(x=status, y=R)) + geom_boxplot(aes(fill=status))

a <- aggregate(q~status, data=df, FUN=summary)
kable(data.frame(status=a$status, q=a$q))
status q.Min. q.1st.Qu. q.Median q.Mean q.3rd.Qu. q.Max.
ca_steadystate 5.848 11.11 16.43 15.8800 20.9000 24.130
ca_transient 1.775 11.98 17.18 16.2700 21.2700 24.240
slowstart 0.000 0.00 0.20 0.3257 0.5246 1.536
ggplot(data=df, aes(x=status, y=q)) + geom_boxplot(aes(fill=status))

a <- aggregate(x~status, data=df, FUN=summary)
kable(data.frame(status=a$status, x=a$x))
status x.Min. x.1st.Qu. x.Median x.Mean x.3rd.Qu. x.Max.
ca_steadystate 11.03000 12.340000 15.40000 15.79000 18.99000 22.04000
ca_transient 0.08942 7.205000 13.81000 12.91000 19.09000 22.10000
slowstart 0.00000 0.006562 0.01458 0.02079 0.02612 0.07846
ggplot(data=df, aes(x=status, y=x)) + geom_boxplot(aes(fill=status))

From the last two table above, the average queue length and its average EWMA are both less than the RED minimum threshold which is equal to 20.

a <- aggregate(p~status, data=df, FUN=summary)
kable(data.frame(status=a$status, p=a$p))
status p.Min. p.1st.Qu. p.Median p.Mean p.3rd.Qu. p.Max.
ca_steadystate 0 0 0 0.004996 0 0.05110
ca_transient 0 0 0 0.006827 0 0.05254
slowstart 0 0 0 0.000000 0 0.00000
ggplot(data=df, aes(x=status, y=p)) + geom_boxplot(aes(fill=status))

a <- aggregate(S~status, data=df, FUN=summary)
kable(data.frame(status=a$status, S=a$S))
status S.Min. S.1st.Qu. S.Median S.Mean S.3rd.Qu. S.Max.
ca_steadystate 20.110 31.00 31.17 30.30 31.47 31.96
ca_transient 20.110 31.03 31.22 30.73 31.58 40.86
slowstart 9.677 10.00 18.66 20.95 29.15 41.95
ggplot(data=df, aes(x=status, y=S)) + geom_boxplot(aes(fill=status))

From the last table above, the average TCP transmission rate keeps values close to the link capacity one. Moreover values dispersion of S is far less than the one of W, q and R.

To follow, summaries of TCP variables W(t),R(t),q(t),x(q(t)),p(x(t)),S(t) conditioned on both status and TCP transmission rate slope {decrease, increase}.

a <- aggregate(W~(status+S_slope), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, W=a$W))
status S_slope W.Min. W.1st.Qu. W.Median W.Mean W.3rd.Qu. W.Max.
ca_steadystate decrease 13.59 19.950 24.390 23.850 28.050 30.860
ca_transient decrease 10.59 18.020 23.270 22.600 27.590 30.970
slowstart decrease 2.00 2.000 2.000 3.827 4.381 10.540
ca_steadystate increase 10.77 11.360 12.210 12.340 13.210 14.610
slowstart increase 2.00 2.444 4.068 4.739 6.711 9.963
a <- aggregate(R~(status+S_slope), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, R=a$R))
status S_slope R.Min. R.1st.Qu. R.Median R.Mean R.3rd.Qu. R.Max.
ca_steadystate decrease 0.4502 0.6512 0.7957 0.7773 0.9169 1.0040
ca_transient decrease 0.2592 0.5993 0.7725 0.7423 0.9089 1.0080
slowstart decrease 0.2000 0.2000 0.2000 0.2057 0.2000 0.2512
ca_steadystate increase 0.3949 0.4075 0.4298 0.4577 0.4771 0.6625
slowstart increase 0.2000 0.2054 0.2085 0.2124 0.2181 0.2440
a <- aggregate(q~(status+S_slope), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, q=a$q))
status S_slope q.Min. q.1st.Qu. q.Median q.Mean q.3rd.Qu. q.Max.
ca_steadystate decrease 7.506 13.540 17.870 17.3200 21.510 24.130
ca_transient decrease 1.775 11.980 17.180 16.2700 21.270 24.240
slowstart decrease 0.000 0.000 0.000 0.1707 0.000 1.536
ca_steadystate increase 5.848 6.226 6.893 7.7300 8.312 13.880
slowstart increase 0.000 0.163 0.254 0.3722 0.542 1.320
a <- aggregate(x~(status+S_slope), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, x=a$x))
status S_slope x.Min. x.1st.Qu. x.Median x.Mean x.3rd.Qu. x.Max.
ca_steadystate decrease 11.03000 12.010000 15.130000 15.66000 19.06000 22.04000
ca_transient decrease 0.08942 7.205000 13.810000 12.91000 19.09000 22.10000
slowstart decrease 0.00000 0.002985 0.005882 0.01671 0.01831 0.07846
ca_steadystate increase 12.71000 14.180000 16.280000 16.49000 18.77000 20.81000
slowstart increase 0.00000 0.008643 0.015950 0.02201 0.03093 0.06906
a <- aggregate(p~(status+S_slope), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, p=a$p))
status S_slope p.Min. p.1st.Qu. p.Median p.Mean p.3rd.Qu. p.Max.
ca_steadystate decrease 0 0 0 0.005630 0 0.05110
ca_transient decrease 0 0 0 0.006827 0 0.05254
slowstart decrease 0 0 0 0.000000 0 0.00000
ca_steadystate increase 0 0 0 0.001416 0 0.02166
slowstart increase 0 0 0 0.000000 0 0.00000
a <- aggregate(S~(status+S_slope), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, S=a$S))
status S_slope S.Min. S.1st.Qu. S.Median S.Mean S.3rd.Qu. S.Max.
ca_steadystate decrease 20.320 31.03 31.19 30.79 31.46 31.96
ca_transient decrease 20.110 31.03 31.22 30.73 31.58 40.86
slowstart decrease 10.000 10.00 10.00 17.94 21.90 41.95
ca_steadystate increase 20.110 22.92 29.42 27.51 31.65 31.96
slowstart increase 9.677 12.09 19.50 21.85 30.87 40.83

To follow, summaries of TCP variables W(t),R(t),q(t),x(q(t)),p(x(t)),S(t) conditioned on status, slope and slope rate level {high, low}.

a <- aggregate(W~(status+S_slope+S_slope_rate), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, S_slope_rate = a$S_slope_rate, W=a$W))
status S_slope S_slope_rate W.Min. W.1st.Qu. W.Median W.Mean W.3rd.Qu. W.Max.
ca_steadystate decrease high 13.87 21.310 27.740 25.510 30.120 30.860
ca_transient decrease high 10.59 12.910 17.290 19.450 26.730 30.970
slowstart decrease high 2.00 2.000 2.000 3.827 4.381 10.540
ca_steadystate increase high 10.77 11.240 11.870 11.990 12.690 13.760
slowstart increase high 2.00 2.444 4.068 4.739 6.711 9.963
ca_steadystate decrease low 13.59 19.870 24.080 23.630 27.670 30.830
ca_transient decrease low 13.32 19.600 23.980 23.490 27.680 30.950
ca_steadystate increase low 13.05 13.840 14.060 14.020 14.270 14.610
a <- aggregate(R~(status+S_slope+S_slope_rate), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, S_slope_rate = a$S_slope_rate, R=a$R))
status S_slope S_slope_rate R.Min. R.1st.Qu. R.Median R.Mean R.3rd.Qu. R.Max.
ca_steadystate decrease high 0.6819 0.9015 0.9921 0.9396 1.0010 1.0040
ca_transient decrease high 0.2592 0.3963 0.8099 0.7081 0.9858 1.0080
slowstart decrease high 0.2000 0.2000 0.2000 0.2057 0.2000 0.2512
ca_steadystate increase high 0.3949 0.4054 0.4208 0.4567 0.4941 0.6496
slowstart increase high 0.2000 0.2054 0.2085 0.2124 0.2181 0.2440
ca_steadystate decrease low 0.4502 0.6315 0.7709 0.7564 0.8901 0.9955
ca_transient decrease low 0.4338 0.6236 0.7675 0.7520 0.8907 0.9992
ca_steadystate increase low 0.4270 0.4362 0.4429 0.4622 0.4496 0.6625
a <- aggregate(q~(status+S_slope+S_slope_rate), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, S_slope_rate = a$S_slope_rate, q=a$q))
status S_slope S_slope_rate q.Min. q.1st.Qu. q.Median q.Mean q.3rd.Qu. q.Max.
ca_steadystate decrease high 14.460 21.050 23.760 22.1900 24.040 24.130
ca_transient decrease high 1.775 5.888 18.300 15.2400 23.580 24.240
slowstart decrease high 0.000 0.000 0.000 0.1707 0.000 1.536
ca_steadystate increase high 5.848 6.161 6.625 7.7020 8.823 13.490
slowstart increase high 0.000 0.163 0.254 0.3722 0.542 1.320
ca_steadystate decrease low 7.506 12.950 17.130 16.6900 20.700 23.860
ca_transient decrease low 7.013 12.710 17.030 16.5600 20.720 23.980
ca_steadystate increase low 6.810 7.086 7.287 7.8660 7.487 13.880
a <- aggregate(x~(status+S_slope+S_slope_rate), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, S_slope_rate = a$S_slope_rate, x=a$x))
status S_slope S_slope_rate x.Min. x.1st.Qu. x.Median x.Mean x.3rd.Qu. x.Max.
ca_steadystate decrease high 20.95000 21.380000 21.680000 21.64000 21.91000 22.04000
ca_transient decrease high 0.08942 1.333000 21.380000 13.87000 21.90000 22.10000
slowstart decrease high 0.00000 0.002985 0.005882 0.01671 0.01831 0.07846
ca_steadystate increase high 13.52000 15.050000 16.980000 17.04000 19.02000 20.71000
slowstart increase high 0.00000 0.008643 0.015950 0.02201 0.03093 0.06906
ca_steadystate decrease low 11.03000 11.820000 14.240000 14.89000 17.72000 21.07000
ca_transient decrease low 2.17400 8.040000 13.170000 12.63000 17.47000 21.10000
ca_steadystate increase low 12.71000 12.910000 13.130000 13.83000 13.37000 20.81000
a <- aggregate(p~(status+S_slope+S_slope_rate), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, S_slope_rate = a$S_slope_rate, p=a$p))
status S_slope S_slope_rate p.Min. p.1st.Qu. p.Median p.Mean p.3rd.Qu. p.Max.
ca_steadystate decrease high 0.02506 0.0345 0.04183 0.040800 0.04751 0.05110
ca_transient decrease high 0.00000 0.0000 0.03441 0.026770 0.04753 0.05254
slowstart decrease high 0.00000 0.0000 0.00000 0.000000 0.00000 0.00000
ca_steadystate increase high 0.00000 0.0000 0.00000 0.001299 0.00000 0.01901
slowstart increase high 0.00000 0.0000 0.00000 0.000000 0.00000 0.00000
ca_steadystate decrease low 0.00000 0.0000 0.00000 0.001106 0.00000 0.02631
ca_transient decrease low 0.00000 0.0000 0.00000 0.001138 0.00000 0.02701
ca_steadystate increase low 0.00000 0.0000 0.00000 0.001989 0.00000 0.02166
a <- aggregate(S~(status+S_slope+S_slope_rate), data=df, FUN=summary)
kable(data.frame(status=a$status, S_slope = a$S_slope, S_slope_rate = a$S_slope_rate, S=a$S))
status S_slope S_slope_rate S.Min. S.1st.Qu. S.Median S.Mean S.3rd.Qu. S.Max.
ca_steadystate decrease high 20.340 23.64 27.96 26.83 30.00 30.97
ca_transient decrease high 20.120 24.20 29.81 28.71 32.57 40.86
slowstart decrease high 10.000 10.00 10.00 17.94 21.90 41.95
ca_steadystate increase high 20.120 22.33 27.97 26.84 31.05 31.88
slowstart increase high 9.677 12.09 19.50 21.85 30.87 40.83
ca_steadystate decrease low 20.320 31.08 31.24 31.30 31.50 31.96
ca_transient decrease low 20.110 31.08 31.25 31.30 31.52 32.20
ca_steadystate increase low 20.110 31.89 31.92 30.80 31.93 31.96

In the following post I will show phase diagrams able to shed some light on TCP performance goods and bads.