Final Project Examples

Setup

Code
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.0     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Code
library(here)
here() starts at /Users/kjhealy/Documents/courses/vsd
Code
library(socviz)

## Mapping
library(sf)
Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
Code
## Census
library(tidycensus)
options(tigris_use_cache = TRUE)

## Activate your API key by uncommenting the next line and 
## putting your key in the quotes
# census_api_key("YOUR_API_KEY_HERE")

## Install this package
## remotes::install_github("kjhealy/nycomplaints")
library(nycomplaints)

Some helpful patterns for the final project

Code
## Count and Arrange
nycomplaints |> 
  count(complaint_type) |> 
  arrange(desc(n))
# A tibble: 57 × 2
   complaint_type            n
   <chr>                 <int>
 1 Housing and Buildings 59207
 2 Transportation        33916
 3 Finance               23395
 4 Immigration           13486
 5 Sanitation            12336
 6 General Welfare       12322
 7 Parks                  9019
 8 Public Safety          8935
 9 Environment            7352
10 Utilities              7158
# ℹ 47 more rows
Code
## Group and count, 
## top 3 complaints within each borough
nycomplaints |> 
  group_by(borough, complaint_type) |> 
  tally() |> 
  slice_max(order_by = n, 
            n = 3)
# A tibble: 18 × 3
# Groups:   borough [6]
   borough       complaint_type            n
   <chr>         <chr>                 <int>
 1 Bronx         Housing and Buildings  5694
 2 Bronx         Finance                3140
 3 Bronx         Immigration            2334
 4 Brooklyn      Housing and Buildings 12228
 5 Brooklyn      Finance                8204
 6 Brooklyn      Transportation         7103
 7 Manhattan     Housing and Buildings 28766
 8 Manhattan     Transportation         5137
 9 Manhattan     General Welfare        3588
10 Queens        Transportation         8201
11 Queens        Housing and Buildings  7493
12 Queens        Finance                3638
13 Staten Island Transportation        10754
14 Staten Island Sanitation             5229
15 Staten Island Finance                4611
16 <NA>          Housing and Buildings  3097
17 <NA>          Immigration            2139
18 <NA>          Transportation         1839
Code
## Group and calculate proportions: top complaint in each boro
nycomplaints |> 
  group_by(borough, complaint_type) |> 
  tally() |>
  mutate(prop = n/sum(n)) |> 
  slice_max(order_by = prop, 
            n = 1)
# A tibble: 6 × 4
# Groups:   borough [6]
  borough       complaint_type            n  prop
  <chr>         <chr>                 <int> <dbl>
1 Bronx         Housing and Buildings  5694 0.308
2 Brooklyn      Housing and Buildings 12228 0.192
3 Manhattan     Housing and Buildings 28766 0.472
4 Queens        Transportation         8201 0.172
5 Staten Island Transportation        10754 0.326
6 <NA>          Housing and Buildings  3097 0.192
Code
## Census variables: official names and short names
acs_vars <- tribble(
      ~varname, ~clean_name,
      "B01003_001", "pop",
      "B01001B_001", "black",
      "B01001A_001", "white",
      "B01001H_001", "nh_white",
      "B01001I_001", "hispanic",
      "B01001D_001", "asian",
      "B19013_001", "median_hh_inc")

## NYC Counties in the Census/ACS = NYC Boroughs in the complaints data 
ny_county_boros <- tribble(
  ~county, ~borough,
  "New York County, New York", "Manhattan",
  "Queens County, New York", "Queens",
  "Kings County, New York", "Brooklyn",
  "Bronx County, New York", "Bronx",
  "Richmond County, New York", "Staten Island"
)

## Data for the 5 NYC Boroughs, with Borough names patched in
county_data <- get_acs(geography = "county", 
        variables = acs_vars$varname, 
        state = "NY", 
        geometry = TRUE) %>% 
  rename(fips = GEOID, 
         county = NAME) %>% 
  select(-moe) %>% 
  tibble::as_tibble() %>% 
  pivot_wider(names_from = variable, 
              values_from = c(estimate)) %>% 
  rename_with(~ acs_vars$clean_name, 
              all_of(acs_vars$varname)) %>% 
  filter(county %in% ny_county_boros$county) %>% 
  left_join(ny_county_boros, by = "county") %>% 
  select(fips, county, borough, everything()) %>% 
  sf::st_as_sf()
Getting data from the 2018-2022 5-year ACS
Code
county_data
Simple feature collection with 5 features and 10 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -74.25563 ymin: 40.4961 xmax: -73.70036 ymax: 40.91771
Geodetic CRS:  NAD83
# A tibble: 5 × 11
  fips  county   borough                  geometry  white  black  asian nh_white
  <chr> <chr>    <chr>          <MULTIPOLYGON [°]>  <dbl>  <dbl>  <dbl>    <dbl>
1 36061 New Yor… Manhat… (((-74.00641 40.6887, -7… 8.43e5 230583 199086   749800
2 36047 Kings C… Brookl… (((-74.04171 40.62638, -… 1.08e6 803621 321110   966614
3 36081 Queens … Queens  (((-73.96262 40.73903, -… 7.21e5 417637 614652   560919
4 36085 Richmon… Staten… (((-74.16154 40.64416, -… 3.26e5  48670  56516   285903
5 36005 Bronx C… Bronx   (((-73.77242 40.85954, -… 2.59e5 491689  56630   123902
# ℹ 3 more variables: hispanic <dbl>, pop <dbl>, median_hh_inc <dbl>
Code
## Join tables: 
## Merge the county_data into aggregated complaints data
## With this you can calculate e.g. complaints per capita within boros.
## This will work for zip codes too.
county_complaints <- nycomplaints |>
  group_by(borough, complaint_type) |> 
  tally() |> 
  mutate(prop = n/sum(n)) |> 
  slice_max(order_by = prop, 
            n = 1) |> 
  drop_na() |> 
  left_join(county_data, by = "borough")

county_complaints
# A tibble: 5 × 14
# Groups:   borough [5]
  borough      complaint_type     n  prop fips  county                  geometry
  <chr>        <chr>          <int> <dbl> <chr> <chr>         <MULTIPOLYGON [°]>
1 Bronx        Housing and B…  5694 0.308 36005 Bronx… (((-73.77242 40.85954, -…
2 Brooklyn     Housing and B… 12228 0.192 36047 Kings… (((-74.04171 40.62638, -…
3 Manhattan    Housing and B… 28766 0.472 36061 New Y… (((-74.00641 40.6887, -7…
4 Queens       Transportation  8201 0.172 36081 Queen… (((-73.96262 40.73903, -…
5 Staten Isla… Transportation 10754 0.326 36085 Richm… (((-74.16154 40.64416, -…
# ℹ 7 more variables: white <dbl>, black <dbl>, asian <dbl>, nh_white <dbl>,
#   hispanic <dbl>, pop <dbl>, median_hh_inc <dbl>