R Code for Grouped Graphs Swirl Lesson

## Load Packages and Import Data

library(ggplot2)  ##for graphing

library(dplyr) ## for filtering, summarising, and other data wrangling

#import hemlock data file

col_types = cols(SamplingDate = col_date(format = "%m/%d/%Y"),

Location = col_factor()))

## Bar Graph of Means and Standard Errors

##calculate descriptive stats and SE for EHS density

EHS.sum <- hemlock %>%

group_by(Location) %>%

summarise(mean = mean(EHS),

sd = sd(EHS),

n = n()) %>%

mutate(sem = sd/(sqrt(n)))

print(EHS.sum)

##Bar plot with mean and SE

g.bar <- ggplot(EHS.sum, aes(x=Location,y=mean, fill))+

geom_bar(stat="identity",  width = 0.5, show.legend=FALSE, fill = "steelblue")+

geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem), width=0.1, size=1) +

ylab("EHS Density (insects/cm)") +

xlab("Location") +

coord_cartesian(xlim = c(0.5,4.5), expand=FALSE) +

theme_classic(base_size=16)

print(g.bar)

## Histograms

##To make a histogram of just one location, first you have to filter for a location (in this case FLH)

FLH.data <- hemlock %>% filter(Location == "FLH")

##Histogram of the EHS density at the FLH location

FLH.hist <- ggplot(data = FLH.data, aes(x = EHS))+

geom_histogram(binwidth = 0.4, color = "white", fill = "steelblue", show.legend = FALSE) +

xlab("EHS Density (insects/cm)") +

ylab("Count") +

coord_cartesian(expand=TRUE) +

theme_classic(base_size=14)

print(FLH.hist)

##Histogram of the HWA density for all 4 locations

g.hist <- ggplot(data = hemlock, aes(x = EHS, fill=Location))+

geom_histogram(binwidth = 0.4, color = "white", show.legend = FALSE) +

facet_grid(Location ~ .) +

xlab("EHS Density (insects/cm)") +

ylab("Count") +

coord_cartesian(expand=TRUE) +

theme_classic(base_size=14)

print(g.hist)

## Box Plots

##Box plot of the EHS density at all 4 locations, X = mean, outliers showing as points

EHS.box <-ggplot(data = hemlock, aes(x= Location, y = EHS, color=Location))+

stat_boxplot(geom ='errorbar', width = 0.1,  na.rm = TRUE, lwd=0.75, show.legend = FALSE) +

geom_boxplot(width = 0.5, na.rm = TRUE,lwd=0.75, show.legend = FALSE) +

stat_summary(fun=mean, geom="point",  shape=4, size=2,

na.rm = TRUE, show.legend = FALSE, colour = "black", stroke = 2) +

ylab("EHS Density (insects/cm)") +

xlab("Location") +

coord_cartesian(ylim=c(0,11.5),expand=TRUE) +

theme_classic(base_size=20)

print(EHS.box)

##Box plot of the EHS density at all 4 locations, X = mean, all points showing

EHS.box2 <-ggplot(data = hemlock, aes(x= Location, y = EHS))+

geom_point(aes(x= Location, y = EHS, fill = Location), shape=21, size=1, alpha = 0.5,

position=position_jitterdodge(jitter.width=0.8), show.legend=FALSE) +

stat_boxplot(geom ='errorbar', width = 0.1,  na.rm = TRUE, lwd=0.75) +

geom_boxplot(width = 0.5, na.rm = TRUE, outlier.shape= NA, alpha = 0.1, lwd=0.75) +

stat_summary(fun=mean, geom="point",  shape=4, size=2,

na.rm = TRUE, show.legend = FALSE, colour = "black", stroke = 2) +

ylab("EHS Density (insects/cm)") +

xlab("Location") +

coord_cartesian(expand=TRUE) +

theme_classic(base_size=20)

print(EHS.box2)

## Violin Plot

#sample size

sample_size = hemlock %>% group_by(Location) %>% summarize (num=n())

#violin plot with box plots and sample sizes

EHS.violin <- hemlock %>%

left_join(sample_size) %>%

mutate(myaxis = paste0(Location, "\n", "n=", num)) %>%

ggplot(aes(x=myaxis, y=EHS, fill=Location)) +

geom_violin(width=1.4) +

geom_boxplot(width=0.1, color="black", alpha=0.8) +

ylab("EHS Density (insects/cm)") +

xlab("Location") +

theme_classic(base_size=18) +

theme(legend.position="none")

print(EHS.violin)

## Dot Plots

#Filter for one semester (Winter 2019)

hemlock\$SamplingDate <- as.Date(hemlock\$SamplingDate, "%m/%d/%Y")

small <- hemlock %>% filter (SamplingDate > as.Date("2018-11-20"))

#Dot plot with mean

dot.mean <-  ggplot(data = small, aes(x= Location, y = EHS))+

geom_point(aes(x= Location, y = EHS, fill = Location), shape=21, size=3, alpha=0.75,

position=position_jitterdodge(jitter.width=0.8), show.legend=FALSE) +

stat_summary(fun = mean, fun.min = mean, fun.max = mean, geom = "crossbar",

width = 0.5, size = 0.75, na.rm = TRUE, show.legend = FALSE, colour = "black") +

ylab("EHS Density (insects/cm)") +

xlab("Location") +

coord_cartesian(expand=TRUE) +

theme_classic(base_size=20) +

theme(legend.position="none")

print(dot.mean)

#Dot plot with median

dot.median <-  ggplot(data = small, aes(x= Location, y = EHS))+

geom_point(aes(x= Location, y = EHS, fill = Location), shape=21, size=3, alpha=0.75,

position=position_jitterdodge(jitter.width=0.8), show.legend=FALSE) +

stat_summary(fun = median, fun.min = median, fun.max = median, geom = "crossbar",

width = 0.5, size = 0.75, na.rm = TRUE, show.legend = FALSE, colour = "black") +

ylab("EHS Density (insects/cm)") +

xlab("Location") +

coord_cartesian(expand=TRUE) +

theme_classic(base_size=20) +

theme(legend.position="none")

print(dot.median)