Basic ggplot2

library(tidyverse)
library(Hmisc) #For summary functions
library(xtable) #For printing the statistics tables

1 Data Import and Reshaping

rawdata <- read.csv("data_input/Example01.csv")

The initial data are in wide format, we need them in long format. For this transformation we can use reshape:

longdata <- reshape(rawdata, varying = list(c("Bio.1", "Bio.2", "Bio.3", "Bio.4")),
    v.names = "Biofilm", timevar = "Experiment", direction = "long")
longdata$id <- factor(longdata$id, labels = c("WT_TSB", "MutA_TSB", "MutB_TSB", "MutAB_TSB",
    "WT_CDM", "MutA_CDM", "MutB_CDM", "MutAB_CDM"))

2 Basic Plots

2.1 Scatterplots

ggplot(longdata) 

ggplot(longdata, aes(x = Strain, y = Biofilm)) +
  geom_point()

ggplot(longdata, aes(x = Strain, y = Biofilm, color = Media)) +
  geom_point()

ggplot(longdata, aes(x = Strain, y = Biofilm, color = Media)) +
  geom_boxplot() 

ggplot(longdata, aes(x = Strain, y = Biofilm, color = Media)) +
  geom_boxplot() +
  geom_point(position = position_dodge(0.75), alpha=0.5)

ggplot(longdata, aes(x = Strain, y = Biofilm, color = Media)) +
  geom_point(alpha=0.5) +
  stat_summary(fun = "mean", geom = "point", size = 4)

ggplot(longdata, aes(x = Strain, y = Biofilm, color = Media)) +
  stat_summary(fun = "mean", geom = "point", size = 4)

2.2 Bar plots

ggplot(longdata, aes(x = Strain, y = Biofilm, color = Media)) +
  stat_summary(fun = "mean", geom = "bar")

ggplot(longdata, aes(x = Strain, y = Biofilm, fill = Media)) +
  stat_summary(fun = "mean", geom = "bar", position = position_dodge())

ggplot(longdata, aes(x = Strain, y = Biofilm, fill = Media)) +
  stat_summary(fun = "mean", geom = "bar", position = position_dodge()) +
  stat_summary(fun.data = "median_hilow", geom = "errorbar", position=position_dodge(.9)) 

ggplot(longdata, aes(x = Strain, y = Biofilm, fill = Media)) +
  stat_summary(fun.data = "median_hilow", geom = "errorbar", position=position_dodge(.9), width = 0.2) +
  stat_summary(fun = "mean", geom = "bar", position = position_dodge()) 

fun.data = “median_hilow” (When the default conf.int=0.95 is used, the lower and upper quantiles computed are 0.025 and 0.975.) See also: “mean_cl_boot” / “mean_sdl”

3 Styling plots

ugly.plot <- ggplot(longdata, aes(x = Strain, y = Biofilm, fill = Media)) +
  stat_summary(fun.data = "median_hilow", geom = "errorbar", position=position_dodge(.9), width = 0.2) +
  stat_summary(fun = "mean", geom = "bar", position = position_dodge()) 
ugly.plot +
  theme_classic()

ugly.plot +
  theme_classic() +
  scale_fill_manual(values = c("gray80", "gray40")) 

ugly.plot +
  theme_classic() +
  scale_fill_manual(values = c("gray80", "gray40")) +
  scale_y_continuous(expand = c(0, 0)) 

ugly.plot +
  theme_classic() +
  scale_fill_manual(values = c("gray80", "gray40")) +
  scale_y_continuous(expand = c(0, 0)) +
  ylab("Biofilm(OD600)") + xlab("")

4 Facets

ugly.plot +
  theme_classic() +
  scale_fill_manual(values = c("gray80", "gray40")) +
  scale_y_continuous(expand = c(0, 0)) +
  ylab("Biofilm(OD600)") + xlab("") +
  facet_wrap(~ Media)

5 Statistics

5.1 One-way ANOVA

ANOVA <- aov(Biofilm ~ id, longdata)
##             Df Sum Sq Mean Sq F value Pr(>F)    
## id           7 0.3844 0.05492   440.9 <2e-16 ***
## Residuals   24 0.0030 0.00012                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

RESULT: There is a statistically significant difference between these conditions, with a p-value of 9.2101939^{-24}

5.2 Two-way ANOVA

ANOVA2 <- aov(Biofilm ~ Strain + Media, longdata)
##             Df  Sum Sq Mean Sq F value   Pr(>F)    
## Strain       3 0.25319 0.08440   49.07 4.62e-11 ***
## Media        1 0.08778 0.08778   51.03 1.11e-07 ***
## Residuals   27 0.04644 0.00172                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

RESULT: There is a statistically significant difference between strains, with a p-value of 4.619916^{-11}. There is a statistically significant difference between media, with a p-value of 1.1074002^{-7}.

5.3 T-test Pairwise comparisons

Pairwise comparisons with Bonferroni correction

BONF <- pairwise.t.test(longdata$Biofilm,longdata$id, p.adj = "bonf")
BONF.table <- xtable(BONF$p.value, digits = 4)
print(BONF.table, type = "html")
WT_TSB MutA_TSB MutB_TSB MutAB_TSB WT_CDM MutA_CDM MutB_CDM
MutA_TSB 0.0000
MutB_TSB 0.0732 0.0000
MutAB_TSB 0.0000 0.0000 0.0000
WT_CDM 0.0000 0.0002 0.0000 0.0000
MutA_CDM 0.0000 0.0000 0.0000 0.0001 0.0000
MutB_CDM 0.0000 0.0000 0.0000 0.0001 0.0000 1.0000
MutAB_CDM 0.0000 0.0000 0.0000 1.0000 0.0000 0.0000 0.0000

Pairwise comparisons with Tukey correction

TUKEY <- TukeyHSD(ANOVA, "id")
TUKEY.table <- xtable(TUKEY$id, digits = 4)
print(TUKEY.table, type = "html")
diff lwr upr p adj
MutA_TSB-WT_TSB -0.1422 -0.1684 -0.1161 0.0000
MutB_TSB-WT_TSB -0.0265 -0.0526 -0.0004 0.0453
MutAB_TSB-WT_TSB -0.2895 -0.3156 -0.2634 0.0000
WT_CDM-WT_TSB -0.0970 -0.1231 -0.0709 0.0000
MutA_CDM-WT_TSB -0.2430 -0.2691 -0.2169 0.0000
MutB_CDM-WT_TSB -0.2410 -0.2671 -0.2149 0.0000
MutAB_CDM-WT_TSB -0.2962 -0.3224 -0.2701 0.0000
MutB_TSB-MutA_TSB 0.1157 0.0896 0.1419 0.0000
MutAB_TSB-MutA_TSB -0.1472 -0.1734 -0.1211 0.0000
WT_CDM-MutA_TSB 0.0453 0.0191 0.0714 0.0002
MutA_CDM-MutA_TSB -0.1008 -0.1269 -0.0746 0.0000
MutB_CDM-MutA_TSB -0.0988 -0.1249 -0.0726 0.0000
MutAB_CDM-MutA_TSB -0.1540 -0.1801 -0.1279 0.0000
MutAB_TSB-MutB_TSB -0.2630 -0.2891 -0.2369 0.0000
WT_CDM-MutB_TSB -0.0705 -0.0966 -0.0444 0.0000
MutA_CDM-MutB_TSB -0.2165 -0.2426 -0.1904 0.0000
MutB_CDM-MutB_TSB -0.2145 -0.2406 -0.1884 0.0000
MutAB_CDM-MutB_TSB -0.2697 -0.2959 -0.2436 0.0000
WT_CDM-MutAB_TSB 0.1925 0.1664 0.2186 0.0000
MutA_CDM-MutAB_TSB 0.0465 0.0204 0.0726 0.0001
MutB_CDM-MutAB_TSB 0.0485 0.0224 0.0746 0.0001
MutAB_CDM-MutAB_TSB -0.0068 -0.0329 0.0194 0.9874
MutA_CDM-WT_CDM -0.1460 -0.1721 -0.1199 0.0000
MutB_CDM-WT_CDM -0.1440 -0.1701 -0.1179 0.0000
MutAB_CDM-WT_CDM -0.1992 -0.2254 -0.1731 0.0000
MutB_CDM-MutA_CDM 0.0020 -0.0241 0.0281 1.0000
MutAB_CDM-MutA_CDM -0.0532 -0.0794 -0.0271 0.0000
MutAB_CDM-MutB_CDM -0.0552 -0.0814 -0.0291 0.0000