library(ggplot2)
or
library(tidyverse)
17-3-7
library(ggplot2)
or
library(tidyverse)
ggplot2 a grammar of graphics,
ggplot2 is easier for beginners is that its default behavior is carefully chosen to satisfy the great majority of cases and is visually pleasing.
One limitation is that ggplot2 is designed to work exclusively with data tables in which rows are observations and columns are variables.
You should have the ggplot2 sheet cheat handy.
To find perform an internet search for "ggplot2 cheat sheet".
Data: The US murders data table is being summarized. We refer to this as the data component.
Geometry: The plot above is a scatterplot. This is referred to as the geometry component.
Aesthetic mapping: The plot uses several visual cues to represent the information provided by the dataset. T
library(dslabs) data(murders)
ggplot
objectsggplot(data = murders)
ggplot
objectsmurders %>% ggplot()
ggplot
objectsp <- ggplot(data = murders) class(p)
## [1] "gg" "ggplot"
print(p) p
DATA %>%
ggplot()
+ LAYER 1 + LAYER 2 + … + LAYER N
geom_X
geom_point
, geom_bar
and geom_histogram
.geom_point
> Aesthetics > > geom_point understands the following aesthetics (required aesthetics are in bold): > > x > > y > > alpha > > colour
murders %>% ggplot() + geom_point(aes(x = population/10^6, y = total))
x =
and y =
if we wanted to since these are the first and second expected arguments, as seen in the help page.p + geom_point(aes(population/10^6, total))
p + geom_point(aes(population/10^6, total)) + geom_text(aes(population/10^6, total, label = abb))
This is fine:
p_test <- p + geom_text(aes(population/10^6, total, label = abb))
This is not:
p_test <- p + geom_text(aes(population/10^6, total), label = abb)
p + geom_point(aes(population/10^6, total), size = 3) + geom_text(aes(population/10^6, total, label = abb))
p + geom_point(aes(population/10^6, total), size = 3) + geom_text(aes(population/10^6, total, label = abb), nudge_x = 1)
args(ggplot)
## function (data = NULL, mapping = aes(), ..., environment = parent.frame()) ## NULL
p <- murders %>% ggplot(aes(population/10^6, total, label = abb))
p + geom_point(size = 3) + geom_text(nudge_x = 1.5)
p + geom_point(size = 3) + geom_text(aes(x = 10, y = 800, label = "Hello there!"))
p + geom_point(size = 3) + geom_text(nudge_x = 0.05) + scale_x_continuous(trans = "log10") + scale_y_continuous(trans = "log10")
p + geom_point(size = 3) + geom_text(nudge_x = 0.05) + scale_x_log10() + scale_y_log10()
p + geom_point(size = 3) + geom_text(nudge_x = 0.05) + scale_x_log10() + scale_y_log10() + xlab("Populations in millions (log scale)") + ylab("Total number of murders (log scale)") + ggtitle("US Gun Murders in 2010")
p <- murders %>% ggplot(aes(population/10^6, total, label = abb)) + geom_text(nudge_x = 0.05) + scale_x_log10() + scale_y_log10() + xlab("Populations in millions (log scale)") + ylab("Total number of murders (log scale)") + ggtitle("US Gun Murders in 2010")
This won't work
p + geom_point(size = 3, color ="blue")
p + geom_point(aes(col=region), size = 3)
r <- murders %>% summarize(rate = sum(total) / sum(population) * 10^6) %>% pull(rate)
p + geom_point(aes(col=region), size = 3) + geom_abline(intercept = log10(r))
p <- p + geom_abline(intercept = log10(r), lty = 2, color = "darkgrey") + geom_point(aes(col=region), size = 3)
p <- p + scale_color_discrete(name = "Region")
library(ggthemes) p + theme_economist()
library(ggthemes) p + theme_fivethirtyeight()
r <- murders %>% summarize(rate = sum(total) / sum(population) * 10^6) %>% pull(rate) murders %>% ggplot(aes(population/10^6, total, label = abb)) + geom_abline(intercept = log10(r), lty = 2, color = "darkgrey") + geom_point(aes(col=region), size = 3) + geom_text_repel() + scale_x_log10() + scale_y_log10() + xlab("Populations in millions (log scale)") + ylab("Total number of murders (log scale)") + ggtitle("US Gun Murders in 2010") + scale_color_discrete(name = "Region") + theme_economist()
qplot
Make a quick scatterplot:
data(murders) x <- log10(murders$population) y <- murders$total qplot(x, y)
There are often reasons to graph plots next to each other. The gridExtra package permits us to do that:
library(gridExtra) p1 <- murders %>% mutate(rate = total/population*10^5) %>% filter(population < 2*10^6) %>% ggplot(aes(population/10^6, rate, label = abb)) + geom_text() + ggtitle("Small States") p2 <- murders %>% mutate(rate = total/population*10^5) %>% filter(population > 10*10^6) %>% ggplot(aes(population/10^6, rate, label = abb)) + geom_text() + ggtitle("Large States")
grid.arrange(p1, p2, ncol = 2)