Scatter Plot

Source code can be obtained here


file <- "nytimes_vote.tsv"
if( !file.exists(file) ) {
    url <- ""
    download.file(url, file)

# convert to more informative column name
df <- read_tsv(file)
df <- df %>% rename("Someone else" = undervt, 
                    "Hilary Clinton" = clintonpct, 
                    "Bernie Sanders" = sanderspct)

df <- gather( select(df, -tvotes), party, pct, -ratio, -fips )
df <- arrange(df, fips, ratio)
df <- df %>% mutate( 
    party = factor( party, levels = c("Hilary Clinton", "Bernie Sanders", "Someone else") )
## # A tibble: 1,461 × 4
##     fips     ratio          party        pct
##    <int>     <dbl>         <fctr>      <dbl>
## 1   4001 0.8504684 Hilary Clinton 0.66388184
## 2   4001 0.8504684 Bernie Sanders 0.28837834
## 3   4001 0.8504684   Someone else 0.04773982
## 4   4003 0.7078563 Hilary Clinton 0.56740814
## 5   4003 0.7078563 Bernie Sanders 0.39412560
## 6   4003 0.7078563   Someone else 0.03846626
## 7   4005 0.6586770 Hilary Clinton 0.44145253
## 8   4005 0.6586770 Bernie Sanders 0.53400523
## 9   4005 0.6586770   Someone else 0.02454224
## 10  4007 0.9009450 Hilary Clinton 0.59508695
## # ... with 1,451 more rows
# 1. point shape = 21 is a doughnut circle
# 2. scale_fill_manual( name = "" ) gets rid of the legend title
# 3. theme's legend.key control the boxes around the legend's shape
fill_color <- c( 
    "Hilary Clinton" = "#5fa0d6",
    "Bernie Sanders" = "#83BC57",
    "Someone else" = "#d65454" 

ggplot( df, aes(x = ratio, y = pct) ) + 
geom_point( aes(fill = party), size = 3, alpha = 0.8, color = "white", shape = 21 ) + 
scale_fill_manual(name = "", values = fill_color) +
theme_bw( base_family = "Arial Narrow" ) + 
scale_y_continuous( label = percent, limits = c(0, 1.05) ) +
scale_x_continuous( limits = c(0, 4.5), breaks = seq(0, 4.5, 0.5) ) +
geom_text( data = data.frame(label = "↑ Share of 2016 primary vote"),
           aes(x = 0, y = 1, label = label), vjust = -1, hjust = 0, size = 3,
           fontface = "bold", family = "Arial Narrow" ) + 
labs( x = "Ratio of registered Democrats to Obama voters →", 
      y = NULL, title = "The Kinds of Places Sanders Beats Clinton",
      subtitle = "Each dot on this chart represents the share of a county's vote for a candidate in the 2016 Democratic primary" ) +
theme( legend.key = element_blank(), 
       legend.position = "top",
       plot.title = element_text(face = "bold"),
       axis.ticks = element_blank(),
       axis.text = element_text(size = 8),
       axis.title.x = element_text(hjust = 1, face = "bold", size = 9),
       panel.grid.minor = element_blank(),
       panel.grid.major = element_line(linetype = "dotted", size = 0.5),
       panel.border = element_blank(),
       plot.margin = margin(t = 10, r = 10, b = 10, l = 10) )

Ethen Liu
