-- command line execution of r scripts: R CMD BATCH < test.r -- useful r commands: # get help (e.g.): > help ("write.table) # write data to file: > write.table(sqd, file="test.dat", col.names = FALSE, quote=FALSE, row.names=FALSE) # read data from file: > rtd <- read.table("uf100-0239-ws55-rtd.dat") > median(rtd$V2) > summary(rtd) V1 V2 V3 Min. :0.0010 Min. : 95 Min. :0.0001115 1st Qu.:0.2507 1st Qu.: 3276 1st Qu.:0.0038440 Median :0.5005 Median : 8318 Median :0.0097611 Mean :0.5005 Mean :12995 Mean :0.0152500 3rd Qu.:0.7502 3rd Qu.:18308 3rd Qu.:0.0214859 Max. :1.0000 Max. :91660 Max. :0.1075688 # produce histogram of column V2: > hist(rtd$V2) # plot cdf: > library(stepfun) > plot(ecdf(rtd$V2)) # qq plot against std normal: > qqnorm(rtd$V2); qqline(rtd$V2) # wilcoxon rank sum test (compare rtds) = mann-whitney u-test: > library(ctest) > wilcox.test(rtd$V2,rtd40$V2,paired=FALSE) Wilcoxon rank sum test with continuity correction data: rtd$V2 and rtd40$V2 W = 440056, p-value = 3.45e-06 alternative hypothesis: true mu is not equal to 0 # -> reject null hyp (null hyp = med are equal) -> med are not equal # kolmogorov-smirnoff test: > ks.test(rtd$V2,rtd50$V2) Two-sample Kolmogorov-Smirnov test data: rtd$V2 and rtd50$V2 D = 0.029, p-value = 0.7944 alternative hypothesis: two.sided Warning message: cannot compute correct p-values with ties in: ks.test(rtd$V2, rtd50$V2) # -> do not reject null hyp (distr are equal) # kendall's tau test: > corr <- read.table("flat100-corr-nov+.dat") # xxx > cor.test(corr$V1,corr$V2, method="kendall") Kendall's rank correlation tau data: corr$V1 and corr$V2 z.tau = 12.9965, p-value = < 2.2e-16 alternative hypothesis: true tau is not equal to 0 sample estimates: tau 0.8816162 # -> reject null hyp (no correlation between data) # spearman's rank order test (alt to above): > cor.test(corr$V1,corr$V2, method="spear") # wilcoxon matched pairs signed-rank test: > wilcox.test(corr$V1,corr$V2, paired=TRUE) Wilcoxon signed rank test with continuity correction data: corr$V1 and corr$V2 V = 3919, p-value = 1.657e-06 alternative hypothesis: true mu is not equal to 0 # -> reject null hyp (no sign perf diff) #kolmogorov-smirnov test against exp distr > ks.test(rtd$V2, pexp, 1/mean(rtd$V2)) > ks.test(rtd$V2, pexp, log(2)/29.4) # note: chisq.test is _not_ the goodness of fit test! # qqplot of rtd vs. simple exp approx: > qqplot(rtd$V2,qexp(rtd$V1,1/mean(rtd$V2))) > qqplot(rtd$V2,qexp(rtd$V1,1/mean(rtd$V2)),log="xy") > rtd <- read.table("ihlk-restart-output-1000-7-rtd.dat") > qqplot(rtd$V2,qexp(1:500/500,log(2)/29.4)) # combine columns into table (array): > qq <- cbind(rtd$V2,qexp(rtd$V1,1/mean(rtd$V2))) # write 2-dim table (array) to file: > write (t(qq), file="qq.dat", ncolumns=2) # count number of inst for which alg A > alg B: > table(corr$V1 > corr$V2) # compute correlation of vectors x,y > cor(x,y) # test distribution for normality: > shapiro.test(x) Shapiro-Wilk normality test [p-value < alpha: null hypothesis = data are normally distributed is rejected]