Visualizing Social Data
Toggle Dark/Light/Auto mode Toggle Dark/Light/Auto mode Toggle Dark/Light/Auto mode

Code

Text and Tables

library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──

## ✔ ggplot2 3.3.5     ✔ purrr   0.3.4
## ✔ tibble  3.1.6     ✔ dplyr   1.0.8
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1

## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ readr::edition_get()   masks testthat::edition_get()
## ✖ dplyr::filter()        masks stats::filter()
## ✖ purrr::is_null()       masks testthat::is_null()
## ✖ dplyr::lag()           masks stats::lag()
## ✖ readr::local_edition() masks testthat::local_edition()
## ✖ dplyr::matches()       masks tidyr::matches(), testthat::matches()
library(socviz)
## 
## Attaching package: 'socviz'

## The following object is masked from 'package:kjhutils':
## 
##     %nin%
organdata %>% select(1:6) %>% sample_n(size = 10)      
## # A tibble: 10 × 6
##    country       year       donors    pop pop_dens   gdp
##    <chr>         <date>      <dbl>  <int>    <dbl> <int>
##  1 Canada        1992-01-01   12.6  28377    0.285 19590
##  2 Switzerland   1993-01-01   16.6   6938   16.8   25316
##  3 Netherlands   2000-01-01   12.6  15926   38.3   26873
##  4 Norway        1995-01-01   15.7   4359    1.35  23868
##  5 Germany       1993-01-01   13.9  81156   22.7   19983
##  6 Netherlands   1995-01-01   15.2  15459   37.2   21723
##  7 Germany       2001-01-01   12.8  82350   23.1   25436
##  8 Australia     NA           NA       NA   NA        NA
##  9 Netherlands   1991-01-01   14.9  15070   36.3   18708
## 10 United States 2002-01-01   21.5 288369    2.99  36006
p <- ggplot(data = organdata,
            mapping = aes(x = year, y = donors))
p + geom_point()      
## Warning: Removed 34 rows containing missing values (geom_point).
p <- ggplot(data = organdata,
            mapping = aes(x = year, y = donors))
p + geom_line(aes(group = country)) + 
  facet_wrap(~ country)      
## Warning: Removed 34 row(s) containing missing values (geom_path).
p <- ggplot(data = organdata,
            mapping = aes(x = country, y = donors))
p + geom_boxplot()      
## Warning: Removed 34 rows containing non-finite values (stat_boxplot).
p <- ggplot(data = organdata,
            mapping = aes(x = donors, y = country))
p + geom_boxplot()       
## Warning: Removed 34 rows containing non-finite values (stat_boxplot).
p <- ggplot(data = organdata,
            mapping = aes(x = donors,
                          y = reorder(country, donors, na.rm=TRUE)))
p + geom_boxplot() +
    labs(y=NULL)      
## Warning: Removed 34 rows containing non-finite values (stat_boxplot).
p <- ggplot(data = organdata,
            mapping = aes(x = donors, fill = world,
                          y = reorder(country, donors, na.rm=TRUE)))
p + geom_boxplot() + labs(x=NULL) +
    theme(legend.position = "top")      
## Warning: Removed 34 rows containing non-finite values (stat_boxplot).
p <- ggplot(data = organdata,
            mapping = aes(x = donors,
                          y = reorder(country, donors, na.rm=TRUE), 
                          color = world))
p + geom_point() + labs(x=NULL) +
  theme(legend.position = "top")
## Warning: Removed 34 rows containing missing values (geom_point).
p <- ggplot(data = organdata,
            mapping = aes(x = donors,
                          y = reorder(country, donors, na.rm=TRUE), 
                          color = world))
p + geom_jitter() + labs(x=NULL) +
    theme(legend.position = "top")      
## Warning: Removed 34 rows containing missing values (geom_point).
p <- ggplot(data = organdata,
            mapping = aes(x = donors,
                          y = reorder(country, donors, na.rm=TRUE), 
                          color = world))
p + geom_jitter(position = position_jitter(width=0.15)) +
    labs(x=NULL) + theme(legend.position = "top")      
## Warning: Removed 34 rows containing missing values (geom_point).
by_country <- organdata %>% group_by(consent_law, country) %>%
    summarize(donors_mean= mean(donors, na.rm = TRUE),
              donors_sd = sd(donors, na.rm = TRUE),
              gdp_mean = mean(gdp, na.rm = TRUE),
              health_mean = mean(health, na.rm = TRUE),
              roads_mean = mean(roads, na.rm = TRUE),
              cerebvas_mean = mean(cerebvas, na.rm = TRUE))
## `summarise()` has grouped output by 'consent_law'. You can override using the
## `.groups` argument.
by_country
## # A tibble: 17 × 8
## # Groups:   consent_law [2]
##    consent_law country     donors_mean donors_sd gdp_mean health_mean roads_mean
##    <chr>       <chr>             <dbl>     <dbl>    <dbl>       <dbl>      <dbl>
##  1 Informed    Australia          10.6     1.14    22179.       1958.      105. 
##  2 Informed    Canada             14.0     0.751   23711.       2272.      109. 
##  3 Informed    Denmark            13.1     1.47    23722.       2054.      102. 
##  4 Informed    Germany            13.0     0.611   22163.       2349.      113. 
##  5 Informed    Ireland            19.8     2.48    20824.       1480.      118. 
##  6 Informed    Netherlands        13.7     1.55    23013.       1993.       76.1
##  7 Informed    United Kin…        13.5     0.775   21359.       1561.       67.9
##  8 Informed    United Sta…        20.0     1.33    29212.       3988.      155. 
##  9 Presumed    Austria            23.5     2.42    23876.       1875.      150. 
## 10 Presumed    Belgium            21.9     1.94    22500.       1958.      155. 
## 11 Presumed    Finland            18.4     1.53    21019.       1615.       93.6
## 12 Presumed    France             16.8     1.60    22603.       2160.      156. 
## 13 Presumed    Italy              11.1     4.28    21554.       1757       122. 
## 14 Presumed    Norway             15.4     1.11    26448.       2217.       70.0
## 15 Presumed    Spain              28.1     4.96    16933        1289.      161. 
## 16 Presumed    Sweden             13.1     1.75    22415.       1951.       72.3
## 17 Presumed    Switzerland        14.2     1.71    27233        2776.       96.4
## # … with 1 more variable: cerebvas_mean <dbl>
by_country <- organdata %>% 
  group_by(consent_law, country) %>%
    summarize(across(where(is.numeric),
                     list(mean = mean, 
                          sd = sd),
                      na.rm = TRUE,
                      .names = "{col}_{fn}"),
              .groups = "drop")
p <- ggplot(data = by_country,
            mapping = aes(x = donors_mean, 
                          y = reorder(country, donors_mean),
                          color = consent_law))
p + geom_point(size=3) +
    labs(x = "Donor Procurement Rate",
         y = NULL, color = "Consent Law") +
    theme(legend.position="top")      
p <- ggplot(data = by_country,
            mapping = aes(x = donors_mean,
                          y = reorder(country, donors_mean)))

p + geom_point(size=3) +
    facet_wrap(~ consent_law, scales = "free_y", ncol = 1) +
    labs(x= "Donor Procurement Rate",
         y= "")       
p <- ggplot(data = by_country, mapping = aes(x = reorder(country,
              donors_mean), y = donors_mean))

p + geom_pointrange(mapping = aes(ymin = donors_mean - donors_sd,
       ymax = donors_mean + donors_sd)) +
     labs(x= "", y= "Donor Procurement Rate") + coord_flip()      

Plot text directly

p <- ggplot(data = by_country,
            mapping = aes(x = roads_mean, y = donors_mean))
p + geom_point() + geom_text(mapping = aes(label = country))
p <- ggplot(data = by_country,
            mapping = aes(x = roads_mean, y = donors_mean))

p + geom_point() + geom_text(mapping = aes(label = country), hjust = 0)
library(ggrepel)      
elections_historic %>% select(2:7)       
## # A tibble: 49 × 6
##     year winner                 win_party ec_pct popular_pct popular_margin
##    <int> <chr>                  <chr>      <dbl>       <dbl>          <dbl>
##  1  1824 John Quincy Adams      D.-R.      0.322       0.309        -0.104 
##  2  1828 Andrew Jackson         Dem.       0.682       0.559         0.122 
##  3  1832 Andrew Jackson         Dem.       0.766       0.547         0.178 
##  4  1836 Martin Van Buren       Dem.       0.578       0.508         0.142 
##  5  1840 William Henry Harrison Whig       0.796       0.529         0.0605
##  6  1844 James Polk             Dem.       0.618       0.495         0.0145
##  7  1848 Zachary Taylor         Whig       0.562       0.473         0.0479
##  8  1852 Franklin Pierce        Dem.       0.858       0.508         0.0695
##  9  1856 James Buchanan         Dem.       0.588       0.453         0.122 
## 10  1860 Abraham Lincoln        Rep.       0.594       0.396         0.101 
## # … with 39 more rows
p_title <- "Presidential Elections: Popular & Electoral College Margins"
p_subtitle <- "1824-2016"
p_caption <- "Data for 2016 are provisional."
x_label <- "Winner's share of Popular Vote"
y_label <- "Winner's share of Electoral College Votes"

p <- ggplot(elections_historic, aes(x = popular_pct, y = ec_pct,
                                    label = winner_label))

p + geom_hline(yintercept = 0.5, size = 1.4, color = "gray80") +
    geom_vline(xintercept = 0.5, size = 1.4, color = "gray80") +
    geom_point() +
    geom_text_repel() +
    scale_x_continuous(labels = scales::percent) +
    scale_y_continuous(labels = scales::percent) +
    labs(x = x_label, y = y_label, title = p_title, subtitle = p_subtitle,
         caption = p_caption)      

Selective labels

p <- ggplot(data = by_country,
            mapping = aes(x = gdp_mean, y = health_mean))

p + geom_point() +
    geom_text_repel(data = subset(by_country, gdp_mean > 25000),
                    mapping = aes(label = country))
p <- ggplot(data = by_country,
            mapping = aes(x = gdp_mean, y = health_mean))

p + geom_point() +
    geom_text_repel(data = subset(by_country,
                                  gdp_mean > 25000 | health_mean < 1500 |
                                  country %in% "Belgium"),
                    mapping = aes(label = country))      
organdata <- organdata %>%
       mutate(ind = ccode %in% c("Ita", "Spa") &
                    organdata$year > 1998)

p <- ggplot(data = organdata,
            mapping = aes(x = roads,
                          y = donors, color = ind))
p + geom_point() +
    geom_text_repel(data = subset(organdata, ind),
                    mapping = aes(label = ccode)) +
    guides(label = "none", color = "none")      
## Warning: Removed 34 rows containing missing values (geom_point).

Arbitrary annotation

p <- ggplot(data = organdata, mapping = aes(x = roads, y = donors))
p + geom_point() + annotate(geom = "text", x = 91, y = 33,
                            label = "A surprisingly high \n recovery rate.",
                            hjust = 0)
## Warning: Removed 34 rows containing missing values (geom_point).
p <- ggplot(data = organdata,
            mapping = aes(x = roads, y = donors))
p + geom_point() +
    annotate(geom = "rect", xmin = 125, xmax = 155,
             ymin = 30, ymax = 35, fill = "red", alpha = 0.2) + 
    annotate(geom = "text", x = 157, y = 33,
             label = "A surprisingly high \n recovery rate.", hjust = 0)      
## Warning: Removed 34 rows containing missing values (geom_point).

Scales and Guides

p <- ggplot(data = organdata,
            mapping = aes(x = roads,
                          y = donors,
                          color = world))
p + geom_point()
## Warning: Removed 34 rows containing missing values (geom_point).
p <- ggplot(data = organdata,
            mapping = aes(x = roads,
                          y = donors,
                          color = world))
p + geom_point() +
    scale_x_log10() +
    scale_y_continuous(breaks = c(5, 15, 25),
                       labels = c("Five", "Fifteen", "Twenty Five"))
## Warning: Removed 34 rows containing missing values (geom_point).
p <- ggplot(data = organdata,
            mapping = aes(x = roads,
                          y = donors,
                          color = world))
p + geom_point() +
    scale_color_discrete(labels =
                             c("Corporatist", "Liberal",
                               "Social Democratic", "Unclassified")) +
    labs(x = "Road Deaths",
         y = "Donor Procurement",
        color = "Welfare State")
## Warning: Removed 34 rows containing missing values (geom_point).
p <- ggplot(data = organdata,
            mapping = aes(x = roads,
                          y = donors,
                          color = world))
p + geom_point() +
    labs(x = "Road Deaths",
         y = "Donor Procurement") +
    guides(color = "none")
## Warning: Removed 34 rows containing missing values (geom_point).