Scatter Plot
Simply focusing on creating a production ready scatter plot, without bothering about the interpretation. Source code can be obtained here
library(tidyr)
library(dplyr)
library(readr)
library(scales)
library(ggplot2)
setwd("/Users/ethen/Business-Analytics/articles/nyt_scatter")
file <- "nytimes_vote.tsv"
if( !file.exists(file) ) {
url <- "https://static01.nyt.com/newsgraphics/2016/04/21/undervote/ad8bd3e44231c1091e75621b9f27fe31d116999f/data.tsv"
download.file(url, file)
}
# convert to more informative column name
df <- read_tsv(file)
df <- df %>% rename("Someone else" = undervt,
"Hilary Clinton" = clintonpct,
"Bernie Sanders" = sanderspct)
df <- gather( select(df, -tvotes), party, pct, -ratio, -fips )
df <- arrange(df, fips, ratio)
df <- df %>% mutate(
party = factor( party, levels = c("Hilary Clinton", "Bernie Sanders", "Someone else") )
)
df
## # A tibble: 1,461 × 4
## fips ratio party pct
## <int> <dbl> <fctr> <dbl>
## 1 4001 0.8504684 Hilary Clinton 0.66388184
## 2 4001 0.8504684 Bernie Sanders 0.28837834
## 3 4001 0.8504684 Someone else 0.04773982
## 4 4003 0.7078563 Hilary Clinton 0.56740814
## 5 4003 0.7078563 Bernie Sanders 0.39412560
## 6 4003 0.7078563 Someone else 0.03846626
## 7 4005 0.6586770 Hilary Clinton 0.44145253
## 8 4005 0.6586770 Bernie Sanders 0.53400523
## 9 4005 0.6586770 Someone else 0.02454224
## 10 4007 0.9009450 Hilary Clinton 0.59508695
## # ... with 1,451 more rows
# 1. point shape = 21 is a doughnut circle
# 2. scale_fill_manual( name = "" ) gets rid of the legend title
# 3. theme's legend.key control the boxes around the legend's shape
fill_color <- c(
"Hilary Clinton" = "#5fa0d6",
"Bernie Sanders" = "#83BC57",
"Someone else" = "#d65454"
)
ggplot( df, aes(x = ratio, y = pct) ) +
geom_point( aes(fill = party), size = 3, alpha = 0.8, color = "white", shape = 21 ) +
scale_fill_manual(name = "", values = fill_color) +
theme_bw( base_family = "Arial Narrow" ) +
scale_y_continuous( label = percent, limits = c(0, 1.05) ) +
scale_x_continuous( limits = c(0, 4.5), breaks = seq(0, 4.5, 0.5) ) +
geom_text( data = data.frame(label = "↑ Share of 2016 primary vote"),
aes(x = 0, y = 1, label = label), vjust = -1, hjust = 0, size = 3,
fontface = "bold", family = "Arial Narrow" ) +
labs( x = "Ratio of registered Democrats to Obama voters →",
y = NULL, title = "The Kinds of Places Sanders Beats Clinton",
subtitle = "Each dot on this chart represents the share of a county's vote for a candidate in the 2016 Democratic primary" ) +
theme( legend.key = element_blank(),
legend.position = "top",
plot.title = element_text(face = "bold"),
axis.ticks = element_blank(),
axis.text = element_text(size = 8),
axis.title.x = element_text(hjust = 1, face = "bold", size = 9),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(linetype = "dotted", size = 0.5),
panel.border = element_blank(),
plot.margin = margin(t = 10, r = 10, b = 10, l = 10) )
R Session Information
devtools::session_info()
## Session info --------------------------------------------------------------
## setting value
## version R version 3.2.4 (2016-03-10)
## system x86_64, darwin13.4.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## tz America/Chicago
## date 2016-12-28
## Packages ------------------------------------------------------------------
## package * version date source
## assertthat 0.1 2013-12-06 CRAN (R 3.2.0)
## bookdown 0.1 2016-07-13 CRAN (R 3.2.5)
## colorspace 1.2-6 2015-03-11 CRAN (R 3.2.0)
## DBI 0.4-1 2016-05-08 CRAN (R 3.2.5)
## devtools 1.12.0 2016-06-24 CRAN (R 3.2.5)
## digest 0.6.9 2016-01-08 CRAN (R 3.2.3)
## dplyr * 0.5.0 2016-06-24 CRAN (R 3.2.5)
## evaluate 0.9 2016-04-29 cran (@0.9)
## formatR 1.4 2016-05-09 cran (@1.4)
## ggplot2 * 2.2.0 2016-11-11 CRAN (R 3.2.5)
## gtable 0.2.0 2016-02-26 CRAN (R 3.2.3)
## highr 0.6 2016-05-09 cran (@0.6)
## htmltools 0.3.5 2016-03-21 CRAN (R 3.2.4)
## httpuv 1.3.3 2015-08-04 CRAN (R 3.2.0)
## knitr 1.14 2016-08-13 CRAN (R 3.2.4)
## labeling 0.3 2014-08-23 CRAN (R 3.2.0)
## lazyeval 0.2.0 2016-06-12 CRAN (R 3.2.5)
## magrittr 1.5 2014-11-22 CRAN (R 3.2.0)
## memoise 1.0.0 2016-01-29 CRAN (R 3.2.3)
## mime 0.4 2015-09-03 CRAN (R 3.2.0)
## miniUI 0.1.1 2016-01-15 CRAN (R 3.2.3)
## munsell 0.4.3 2016-02-13 CRAN (R 3.2.3)
## plyr 1.8.4 2016-06-08 cran (@1.8.4)
## questionr 0.5 2016-03-15 CRAN (R 3.2.4)
## R6 2.1.2 2016-01-26 CRAN (R 3.2.3)
## Rcpp 0.12.5 2016-05-14 cran (@0.12.5)
## readr * 0.2.2 2015-10-22 CRAN (R 3.2.0)
## rmarkdown 1.1 2016-10-16 CRAN (R 3.2.4)
## rmdformats 0.3 2016-09-05 CRAN (R 3.2.5)
## rstudioapi 0.6 2016-06-27 CRAN (R 3.2.5)
## scales * 0.4.1 2016-11-09 CRAN (R 3.2.5)
## shiny 0.13.2 2016-03-28 CRAN (R 3.2.4)
## stringi 1.0-1 2015-10-22 CRAN (R 3.2.0)
## stringr 1.0.0 2015-04-30 CRAN (R 3.2.0)
## tibble 1.2 2016-08-26 CRAN (R 3.2.5)
## tidyr * 0.5.1 2016-06-14 CRAN (R 3.2.5)
## withr 1.0.1 2016-02-04 CRAN (R 3.2.3)
## xtable 1.8-2 2016-02-05 CRAN (R 3.2.3)
## yaml 2.1.13 2014-06-12 CRAN (R 3.2.0)