R Code for Grouped Graphs Swirl Lesson

Load Packages and Import Data

library(readr) ##for importing data

library(ggplot2)  ##for graphing

library(dplyr) ## for filtering, summarising, and other data wrangling

#import hemlock data file

hemlock <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vT-Uo5Gs2dcR6f6_PFrZwkaSrojsBCFt1qvVNU0PXn4RHVe3_GDzNL3BCxkkp6eIhjkfKw3S6YcX6wz/pub?output=csv",

                    col_types = cols(SamplingDate = col_date(format = "%m/%d/%Y"),

                                     Location = col_factor()))

Bar Graph of Means and Standard Errors

##calculate descriptive stats and SE for EHS density

EHS.sum <- hemlock %>%

  group_by(Location) %>%

  summarise(mean = mean(EHS),

            sd = sd(EHS),

            n = n()) %>%

  mutate(sem = sd/(sqrt(n)))

print(EHS.sum)

##Bar plot with mean and SE

g.bar <- ggplot(EHS.sum, aes(x=Location,y=mean, fill))+

  geom_bar(stat="identity",  width = 0.5, show.legend=FALSE, fill = "steelblue")+

  geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem), width=0.1, size=1) +

  ylab("EHS Density (insects/cm)") +

  xlab("Location") +

  coord_cartesian(xlim = c(0.5,4.5), expand=FALSE) +

  theme_classic(base_size=16)

print(g.bar)

Histograms

##To make a histogram of just one location, first you have to filter for a location (in this case FLH)

FLH.data <- hemlock %>% filter(Location == "FLH")

##Histogram of the EHS density at the FLH location

FLH.hist <- ggplot(data = FLH.data, aes(x = EHS))+

  geom_histogram(binwidth = 0.4, color = "white", fill = "steelblue", show.legend = FALSE) +

  xlab("EHS Density (insects/cm)") +

  ylab("Count") +

  coord_cartesian(expand=TRUE) +

  theme_classic(base_size=14)

print(FLH.hist)

##Histogram of the HWA density for all 4 locations

g.hist <- ggplot(data = hemlock, aes(x = EHS, fill=Location))+

  geom_histogram(binwidth = 0.4, color = "white", show.legend = FALSE) +

  facet_grid(Location ~ .) +

  xlab("EHS Density (insects/cm)") +

  ylab("Count") +

  coord_cartesian(expand=TRUE) +

  theme_classic(base_size=14)

print(g.hist)

Box Plots

##Box plot of the EHS density at all 4 locations, X = mean, outliers showing as points

EHS.box <-ggplot(data = hemlock, aes(x= Location, y = EHS, color=Location))+

  stat_boxplot(geom ='errorbar', width = 0.1,  na.rm = TRUE, lwd=0.75, show.legend = FALSE) +

  geom_boxplot(width = 0.5, na.rm = TRUE,lwd=0.75, show.legend = FALSE) +

  stat_summary(fun=mean, geom="point",  shape=4, size=2,  

               na.rm = TRUE, show.legend = FALSE, colour = "black", stroke = 2) +

  ylab("EHS Density (insects/cm)") +

  xlab("Location") +

  coord_cartesian(ylim=c(0,11.5),expand=TRUE) +

  theme_classic(base_size=20)

print(EHS.box)

##Box plot of the EHS density at all 4 locations, X = mean, all points showing

EHS.box2 <-ggplot(data = hemlock, aes(x= Location, y = EHS))+

  geom_point(aes(x= Location, y = EHS, fill = Location), shape=21, size=1, alpha = 0.5,

             position=position_jitterdodge(jitter.width=0.8), show.legend=FALSE) +

  stat_boxplot(geom ='errorbar', width = 0.1,  na.rm = TRUE, lwd=0.75) +

  geom_boxplot(width = 0.5, na.rm = TRUE, outlier.shape= NA, alpha = 0.1, lwd=0.75) +

  stat_summary(fun=mean, geom="point",  shape=4, size=2,  

               na.rm = TRUE, show.legend = FALSE, colour = "black", stroke = 2) +

  ylab("EHS Density (insects/cm)") +

  xlab("Location") +

  coord_cartesian(expand=TRUE) +

  theme_classic(base_size=20)

print(EHS.box2)

Violin Plot

#sample size

sample_size = hemlock %>% group_by(Location) %>% summarize (num=n())

#violin plot with box plots and sample sizes

EHS.violin <- hemlock %>%

  left_join(sample_size) %>%

  mutate(myaxis = paste0(Location, "\n", "n=", num)) %>%

  ggplot(aes(x=myaxis, y=EHS, fill=Location)) +

    geom_violin(width=1.4) +

    geom_boxplot(width=0.1, color="black", alpha=0.8) +

    ylab("EHS Density (insects/cm)") +

    xlab("Location") +

    theme_classic(base_size=18) +

    theme(legend.position="none")

print(EHS.violin)

Dot Plots

#Filter for one semester (Winter 2019)

hemlock$SamplingDate <- as.Date(hemlock$SamplingDate, "%m/%d/%Y")

small <- hemlock %>% filter (SamplingDate > as.Date("2018-11-20"))

#Dot plot with mean

dot.mean <-  ggplot(data = small, aes(x= Location, y = EHS))+

  geom_point(aes(x= Location, y = EHS, fill = Location), shape=21, size=3, alpha=0.75,

             position=position_jitterdodge(jitter.width=0.8), show.legend=FALSE) +

  stat_summary(fun = mean, fun.min = mean, fun.max = mean, geom = "crossbar",

               width = 0.5, size = 0.75, na.rm = TRUE, show.legend = FALSE, colour = "black") +

  ylab("EHS Density (insects/cm)") +

  xlab("Location") +

  coord_cartesian(expand=TRUE) +

  theme_classic(base_size=20) +

  theme(legend.position="none")

print(dot.mean)

#Dot plot with median

dot.median <-  ggplot(data = small, aes(x= Location, y = EHS))+

  geom_point(aes(x= Location, y = EHS, fill = Location), shape=21, size=3, alpha=0.75,

             position=position_jitterdodge(jitter.width=0.8), show.legend=FALSE) +

  stat_summary(fun = median, fun.min = median, fun.max = median, geom = "crossbar",

               width = 0.5, size = 0.75, na.rm = TRUE, show.legend = FALSE, colour = "black") +

  ylab("EHS Density (insects/cm)") +

  xlab("Location") +

  coord_cartesian(expand=TRUE) +

  theme_classic(base_size=20) +

  theme(legend.position="none")

print(dot.median)