- More important than
qplot
to access full functionality of ggplot2
- Main arguments
- data set, usually a
data.frame
or tibble
- aesthetic mappings provided by
aes
function
- General
ggplot
syntax
ggplot(data, aes(...)) + geom() + ... + stat() + ...
- Layer specifications
geom(mapping, data, ..., geom, position)
stat(mapping, data, ..., stat, position)
- Additional components
aes()
mappings can be passed on to all components (ggplot, geom
, etc.). Effects are global when passed on to ggplot()
and local for other components.
x, y
color
: grouping vector (factor)
group
: grouping vector (factor)
Changing Plotting Themes in ggplot
- Theme settings can be accessed with
theme_get()
- Their settings can be changed with
theme()
Example how to change background color to white
... + theme(panel.background=element_rect(fill = "white", colour = "black"))
Storing ggplot
Specifications
Plots and layers can be stored in variables
p <- ggplot(dsmall, aes(carat, price)) + geom_point()
p # or print(p)
Returns information about data and aesthetic mappings followed by each layer
summary(p)
Print dots with different sizes and colors
bestfit <- geom_smooth(method = "lm", se = F, color = alpha("steelblue", 0.5), size = 2)
p + bestfit # Plot with custom regression line
Syntax to pass on other data sets
p %+% diamonds[sample(nrow(diamonds), 100),]
Saves plot stored in variable p
to file
ggsave(p, file="myplot.pdf")
Standard R export functons for graphics work as well (see here).
ggplot
: scatter plots
Basic example
set.seed(1410)
dsmall <- as.data.frame(diamonds[sample(nrow(diamonds), 1000), ])
p <- ggplot(dsmall, aes(carat, price, color=color)) +
geom_point(size=4)
print(p)
Interactive version of above plot can be generated with the ggplotly
function from the plotly
package.
library(plotly)
ggplotly(p)
Regression line
p <- ggplot(dsmall, aes(carat, price)) + geom_point() +
geom_smooth(method="lm", se=FALSE) +
theme(panel.background=element_rect(fill = "white", colour = "black"))
print(p)
## `geom_smooth()` using formula = 'y ~ x'
Several regression lines
p <- ggplot(dsmall, aes(carat, price, group=color)) +
geom_point(aes(color=color), size=2) +
geom_smooth(aes(color=color), method = "lm", se=FALSE)
print(p)
## `geom_smooth()` using formula = 'y ~ x'
Local regression curve (loess)
p <- ggplot(dsmall, aes(carat, price)) + geom_point() + geom_smooth()
print(p) # Setting se=FALSE removes error shade
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
ggplot
: line plot
p <- ggplot(iris, aes(Petal.Length, Petal.Width, group=Species,
color=Species)) + geom_line()
print(p)
Faceting
p <- ggplot(iris, aes(Sepal.Length, Sepal.Width)) +
geom_line(aes(color=Species), size=1) +
facet_wrap(~Species, ncol=1)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
print(p)
Exercise 3
Scatter plots with ggplot2
- Task 1: Generate scatter plot for first two columns in
iris
data frame and color dots by its Species
column.
- Task 2: Use the
xlim
and ylim
arguments to set limits on the x- and y-axes so that all data points are restricted to the left bottom quadrant of the plot.
- Task 3: Generate corresponding line plot with faceting presenting the individual data sets in saparate plots.
Structure of iris
data set
class(iris)
## [1] "data.frame"
iris[1:4,]
table(iris$Species)
##
## setosa versicolor virginica
## 50 50 50
Bar Plots
Sample Set: the following transforms the iris
data set into a ggplot2-friendly format.
Calculate mean values for aggregates given by Species
column in iris
data set
iris_mean <- aggregate(iris[,1:4], by=list(Species=iris$Species), FUN=mean)
Calculate standard deviations for aggregates given by Species
column in iris
data set
iris_sd <- aggregate(iris[,1:4], by=list(Species=iris$Species), FUN=sd)
Reformat iris_mean
with melt
from wide to long form as expected by ggplot2
. Newer alternatives for restructuring data.frames
and tibbles
from wide into long form use the gather
and pivot_longer
functions defined by the tidyr
package. Their usage is shown below as well. The functions pivot_longer
and pivot_wider
are expected to provide the most flexible long-term solution, but may not work in older R versions.
library(reshape2) # Defines melt function
df_mean <- melt(iris_mean, id.vars=c("Species"), variable.name = "Samples", value.name="Values")
df_mean2 <- tidyr::gather(iris_mean, !Species, key = "Samples", value = "Values")
df_mean3 <- tidyr::pivot_longer(iris_mean, !Species, names_to="Samples", values_to="Values")
Reformat iris_sd
with melt
df_sd <- melt(iris_sd, id.vars=c("Species"), variable.name = "Samples", value.name="Values")
Define standard deviation limits
limits <- aes(ymax = df_mean[,"Values"] + df_sd[,"Values"], ymin=df_mean[,"Values"] - df_sd[,"Values"])
Verical orientation
p <- ggplot(df_mean, aes(Samples, Values, fill = Species)) +
geom_bar(position="dodge", stat="identity")
print(p)
To enforce that the bars are plotted in the order specified in the input data, one can instruct ggplot
to do so by turning the corresponding column (here Species
) into an ordered factor as follows.
df_mean$Species <- factor(df_mean$Species, levels=unique(df_mean$Species), ordered=TRUE)
In the above example this is not necessary since ggplot
uses this order already.
Horizontal orientation
p <- ggplot(df_mean, aes(Samples, Values, fill = Species)) +
geom_bar(position="dodge", stat="identity") + coord_flip() +
theme(axis.text.y=element_text(angle=0, hjust=1))
print(p)
Faceting
p <- ggplot(df_mean, aes(Samples, Values)) + geom_bar(aes(fill = Species), stat="identity") +
facet_wrap(~Species, ncol=1)
print(p)
Error bars
p <- ggplot(df_mean, aes(Samples, Values, fill = Species)) +
geom_bar(position="dodge", stat="identity") +
geom_errorbar(limits, position="dodge")
print(p)
Mirrored
df <- data.frame(group = rep(c("Above", "Below"), each=10), x = rep(1:10, 2), y = c(runif(10, 0, 1), runif(10, -1, 0)))
p <- ggplot(df, aes(x=x, y=y, fill=group)) +
geom_col()
print(p)
Changing Color Settings
library(RColorBrewer)
# display.brewer.all()
p <- ggplot(df_mean, aes(Samples, Values, fill=Species, color=Species)) +
geom_bar(position="dodge", stat="identity") + geom_errorbar(limits, position="dodge") +
scale_fill_brewer(palette="Blues") + scale_color_brewer(palette = "Greys")
print(p)
Using standard R color theme
p <- ggplot(df_mean, aes(Samples, Values, fill=Species, color=Species)) +
geom_bar(position="dodge", stat="identity") + geom_errorbar(limits, position="dodge") +
scale_fill_manual(values=c("red", "green3", "blue")) +
scale_color_manual(values=c("red", "green3", "blue"))
print(p)
Exercise 4
Bar plots
- Task 1: Calculate the mean values for the
Species
components of the first four columns in the iris
data set. Use the melt
function from the reshape2
package to bring the data into the expected format for ggplot
.
- Task 2: Generate two bar plots: one with stacked bars and one with horizontally arranged bars.
Structure of iris data set
class(iris)
## [1] "data.frame"
iris[1:4,]
table(iris$Species)
##
## setosa versicolor virginica
## 50 50 50
Data reformatting example
Here for line plot
y <- matrix(rnorm(500), 100, 5, dimnames=list(paste("g", 1:100, sep=""), paste("Sample", 1:5, sep="")))
y <- data.frame(Position=1:length(y[,1]), y)
y[1:4, ] # First rows of input format expected by melt()
df <- melt(y, id.vars=c("Position"), variable.name = "Samples", value.name="Values")
p <- ggplot(df, aes(Position, Values)) + geom_line(aes(color=Samples)) + facet_wrap(~Samples, ncol=1)
print(p)
Same data can be represented in box plot as follows
ggplot(df, aes(Samples, Values, fill=Samples)) + geom_boxplot() + geom_jitter(color="darkgrey")
Jitter Plots
p <- ggplot(dsmall, aes(color, price/carat)) +
geom_jitter(alpha = I(1 / 2), aes(color=color))
print(p)
Box plots
p <- ggplot(dsmall, aes(color, price/carat, fill=color)) + geom_boxplot()
print(p)
Violin plots
p <- ggplot(dsmall, aes(color, price/carat, fill=color)) + geom_violin()
print(p)
Same violin plot as interactive plot generated with ggplotly
, where the actual data points are shown as well by including geom_jitter()
.
p <- ggplot(dsmall, aes(color, price/carat, fill=color)) + geom_violin() + geom_jitter(aes(color=color))
ggplotly(p)
Density plots
Line coloring
p <- ggplot(dsmall, aes(carat)) + geom_density(aes(color = color))
print(p)
Area coloring
p <- ggplot(dsmall, aes(carat)) + geom_density(aes(fill = color))
print(p)
Histograms
p <- ggplot(iris, aes(x=Sepal.Width)) +
geom_histogram(aes(y = ..density.., fill = ..count..), binwidth=0.2) +
geom_density()
print(p)
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
Pie Chart
df <- data.frame(variable=rep(c("cat", "mouse", "dog", "bird", "fly")),
value=c(1,3,3,4,2))
p <- ggplot(df, aes(x = "", y = value, fill = variable)) +
geom_bar(width = 1, stat="identity") +
coord_polar("y", start=pi / 3) + ggtitle("Pie Chart")
print(p)
Wind Rose Pie Chart
p <- ggplot(df, aes(x = variable, y = value, fill = variable)) +
geom_bar(width = 1, stat="identity") +
coord_polar("y", start=pi / 3) +
ggtitle("Pie Chart")
print(p)
Arranging Graphics on Page
Using grid
package
library(grid)
a <- ggplot(dsmall, aes(color, price/carat)) + geom_jitter(size=4, alpha = I(1 / 1.5), aes(color=color))
b <- ggplot(dsmall, aes(color, price/carat, color=color)) + geom_boxplot()
c <- ggplot(dsmall, aes(color, price/carat, fill=color)) + geom_boxplot() + theme(legend.position = "none")
grid.newpage() # Open a new page on grid device
pushViewport(viewport(layout = grid.layout(2, 2))) # Assign to device viewport with 2 by 2 grid layout
print(a, vp = viewport(layout.pos.row = 1, layout.pos.col = 1:2))
print(b, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))
print(c, vp = viewport(layout.pos.row = 2, layout.pos.col = 2, width=0.3, height=0.3, x=0.8, y=0.8))
Using gridExtra
package
library(gridExtra)
grid.arrange(a, b, c, nrow = 2, ncol=2)
Also see patchwork
in ggplot2 book here.
Inserting Graphics into Plots
library(grid)
print(a)
print(b, vp=viewport(width=0.3, height=0.3, x=0.8, y=0.8))