A common structure for a ggplot() command with a fair
amount of customization might be:
scale_axisname_typescale_x_continuous for a continuous x-axisscale_x_discrete for a discrete onescale_y_continuous,
scale_color_continuous, scale_color_gradient,
scale_fill_discrete, and so on and so onlimits)data <- tibble(category = c('Apple','Banana','Carrot','Apple','Banana','Carrot'),
person = c('Me','Me','Me','You','You','You'),
quality = c(.06,.04,.03,.01,.06,.03))
ggplot(data, aes(x = person, y = quality, fill = category)) + geom_col(position = 'dodge')library(scales)
ggplot(data, aes(x = person, y = quality, fill = category)) + geom_col(position = 'dodge') +
scale_y_continuous(labels = label_percent(), limits = c(0,.1)) +
scale_x_discrete(position = 'top') +
scale_fill_manual(values = c('Apple'='red','Banana'='yellow','Carrot'='orange'))labels is handy. labels = c('Red Apple',
'Yellow Banana','Orange Carrot') would relabel the legend
(or axis labels)scale_x_continuous(labels = scales::label_dollar()) would
put it in dollar terms. More on this in a momentscale_x_date(date_labels = '%m/%Y')scale_color_/scale_fill_ functions that solely
exist to help with this!Especially useful are:
scale_color_gradient() for gradient scales (or
_gradient2() for diverging scales with a “middle” in them),
scale_color_viridis() also has some great gradient scales
(either discrete or continuous!)scale_color_brewer()/scale_fill_brewer()
functions for discrete values, or _distiller() for
continuous values, or _fermenter() for binnedggplot(data, aes(x = person, y = quality, fill = category)) + geom_col(position = 'dodge') +
scale_y_continuous(labels = label_percent(), limits = c(0,.1)) +
scale_x_discrete(position = 'top') +
scale_fill_brewer(palette = 'Dark2')ggplot(data, aes(x = person, y = quality, group = category, fill = quality)) + geom_col(position = 'dodge') +
scale_y_continuous(labels = label_percent(), limits = c(0,.1)) +
scale_x_discrete(position = 'top') +
scale_fill_viridis_c()ggplot(data, aes(x = person, y = quality, group = category, fill = quality)) + geom_col(position = 'dodge') +
scale_y_continuous(labels = label_percent(), limits = c(0,.1)) +
scale_x_discrete(position = 'top') +
scale_fill_gradient2(midpoint = .03)scale_something_continuous entries have a
trans option, set to date, log,
probability, reciprocal, sqrt,
reverse, etc. etc. to perform that transformation before
plottingscale_ functions. TBH
the only transformations I see frequently are
scale_something_log10() or
scale_something_binned()ggplot(mtcars, aes(x = mpg, y = hp, color = wt)) +
geom_point() +
scale_x_log10() +
scale_y_continuous(trans='reverse') +
scale_color_binned()When to use log scales?
Two main types of functions in scales:
dollar():
dollar(10) creates $10 (NOTE: handy sometimes in RMarkdown
text! Also note this creates text, not numbers, so don’t use them in
aes() unless you want the variable to be a string)label_dollar() designed to slot
directly into the labels= argument.
scale_y_continuous(labels = label_dollar()) turns all your
y-axis labels into the dollar equivalentggplot(data, aes(x = person, y = quality, fill = category)) + geom_col(position = 'dodge') +
scale_y_continuous(labels = label_percent(), limits = c(0,.1))The label_ functions have lots of options! You can set
the accuracy (precision), decide how to break up big
numbers (big.mark) or scale things down to, say, thousands!
(scale=1/1000, suffix = 'k')
data(gapminder, package = 'gapminder')
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp)) + geom_point() +
scale_x_log10(labels = label_dollar(accuracy = 1, scale = 1/1000, suffix = 'k'))help(whatever) before using
somethingdata(mtcars)
mtcars <- mtcars %>% mutate(CarName = row.names(mtcars))
ggplot(mtcars, aes(x = mpg, y = hp, color = wt)) + geom_point() +
scale_x_log10() + scale_y_reverse() + scale_color_binned() +
labs(x = 'Miles per Gallon', y = 'Horsepower', color = 'Car Weight',
title = 'Title', subtitle = 'Subtitle', caption = 'Caption')geom_smooth best fit
line over top, for example)ggplot(mtcars, aes(x = mpg, y = hp, color = wt)) + geom_point() +
scale_x_log10() + scale_y_reverse() + scale_color_binned() +
labs(x = 'Miles per Gallon', y = 'Horsepower', color = 'Car Weight') +
geom_smooth(method='lm', se = FALSE)ggplot(mtcars, aes(x = mpg, y = hp, color = wt)) + geom_point() +
scale_x_log10() + scale_y_reverse() + scale_color_binned() +
labs(x = 'Miles per Gallon', y = 'Horsepower', color = 'Car Weight') +
geom_text_repel(data = mtcars %>% slice(1:5),aes(label = CarName),hjust=-1)ggplot(mtcars, aes(x = mpg, y = hp)) + geom_point() +
facet_wrap('cyl') +
labs(x = 'Miles per Gallon', y = 'Horsepower', title = 'Horsepower vs. MPG by Cylinders')library(ggforce)
ggplot(iris, aes(Petal.Length, Petal.Width, colour = Species)) +
geom_point() +
facet_zoom(x = Species == 'versicolor')aes(), it becomes an axis and must be mapped to a variable
nameaes() (and in the geometry),
it’s a setting applied to the entire geometry# We've seen this before
mtcars <- mtcars %>%
mutate(Transmission = factor(am, labels = c('Automatic','Manual')))
ggplot(mtcars, aes(x = mpg, y = hp, color = Transmission)) +
geom_point()ggplot(mtcars, aes(x = mpg, y = hp, color = Transmission,
size = wt, shape = Transmission)) +
geom_point()ggplot(mtcars, aes(x = Transmission, fill = factor(cyl))) +
geom_bar(position = 'dodge', linetype = 'dashed', color = 'black')theme()help(theme)element_
functions like element_text(), element_line(),
element_rect() which take aesthetic settings like
size, color, etc.theme_classic() or
theme_minimal() or theme_void()axis.line and
element_lineaxis.line or
even line) or specifically (axis.ticks.x)library(ggalt)
mtcars2 <- mtcars %>% group_by(Transmission) %>% summarize(count = n())
ggplot(mtcars2, aes(x = Transmission,y=count)) +
geom_lollipop(color = 'red', size = 2) + coord_flip() +
labs(x = '', y = '') +
scale_y_continuous(breaks = c(10,20), limits = c(0,20)) +
theme(axis.line = element_line(color = 'red'),
axis.ticks.x = element_line(size = 5))panel to change what goes behind that
geometry! element_rect might come up!element_blank()library(gghighlight); data(gapminder, package = 'gapminder')
ggplot(gapminder, aes(x = year, y = lifeExp, color=country)) + geom_line(size = 1.5) +
labs(x = NULL, y = "Life Expectancy", title = "North America Only") +
scale_x_continuous(limits=c(1950,2015),
breaks = c(1950,1970,1990,2010))+
gghighlight(country %in% c('United States','Canada','Mexico'),
unhighlighted_params = aes(size=.1),
label_params=list(direction='y',nudge_x=10)) +
theme_minimal(base_family='serif') scale_X_manual()gapminder %>% mutate(color_name = ifelse(country %in% c('United States','Canada','Mexico'), as.character(country), 'Other')) %>%
ggplot(aes(x = year, y = lifeExp, group = country, color=color_name, size = color_name, alpha = color_name)) + geom_line() +
geom_text(aes(label = ifelse(year == 2007 & color_name != 'Other', color_name,'')),
hjust = 0, size = 13/.pt) +
labs(x = NULL, y = "Life Expectancy", title = "North America Only") +
scale_x_continuous(limits=c(1950,2025),
breaks = c(1950,1970,1990,2010))+
scale_color_manual(values = c('red','forestgreen','gray','blue')) +
scale_size_manual(values = c(1.5,1.5,.1,1.5)) +
scale_alpha_manual(values = c(1,1,.2,1)) +
theme_minimal(base_family='serif') +
guides(color = 'none', size = 'none', alpha = 'none')geom_text (see
previous slide)