── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.0 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Code
library(here)
here() starts at /Users/kjhealy/Documents/courses/vsd
Code
library(socviz)## Mappinglibrary(sf)
Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
Code
## Censuslibrary(tidycensus)options(tigris_use_cache =TRUE)## Activate your API key by uncommenting the next line and ## putting your key in the quotes# census_api_key("YOUR_API_KEY_HERE")## Install this package## remotes::install_github("kjhealy/nycomplaints")library(nycomplaints)
Some helpful patterns for the final project
Code
## Count and Arrangenycomplaints |>count(complaint_type) |>arrange(desc(n))
# A tibble: 57 × 2
complaint_type n
<chr> <int>
1 Housing and Buildings 59207
2 Transportation 33916
3 Finance 23395
4 Immigration 13486
5 Sanitation 12336
6 General Welfare 12322
7 Parks 9019
8 Public Safety 8935
9 Environment 7352
10 Utilities 7158
# ℹ 47 more rows
Code
## Group and count, ## top 3 complaints within each boroughnycomplaints |>group_by(borough, complaint_type) |>tally() |>slice_max(order_by = n, n =3)
# A tibble: 18 × 3
# Groups: borough [6]
borough complaint_type n
<chr> <chr> <int>
1 Bronx Housing and Buildings 5694
2 Bronx Finance 3140
3 Bronx Immigration 2334
4 Brooklyn Housing and Buildings 12228
5 Brooklyn Finance 8204
6 Brooklyn Transportation 7103
7 Manhattan Housing and Buildings 28766
8 Manhattan Transportation 5137
9 Manhattan General Welfare 3588
10 Queens Transportation 8201
11 Queens Housing and Buildings 7493
12 Queens Finance 3638
13 Staten Island Transportation 10754
14 Staten Island Sanitation 5229
15 Staten Island Finance 4611
16 <NA> Housing and Buildings 3097
17 <NA> Immigration 2139
18 <NA> Transportation 1839
Code
## Group and calculate proportions: top complaint in each boronycomplaints |>group_by(borough, complaint_type) |>tally() |>mutate(prop = n/sum(n)) |>slice_max(order_by = prop, n =1)
# A tibble: 6 × 4
# Groups: borough [6]
borough complaint_type n prop
<chr> <chr> <int> <dbl>
1 Bronx Housing and Buildings 5694 0.308
2 Brooklyn Housing and Buildings 12228 0.192
3 Manhattan Housing and Buildings 28766 0.472
4 Queens Transportation 8201 0.172
5 Staten Island Transportation 10754 0.326
6 <NA> Housing and Buildings 3097 0.192
Code
## Census variables: official names and short namesacs_vars <-tribble(~varname, ~clean_name,"B01003_001", "pop","B01001B_001", "black","B01001A_001", "white","B01001H_001", "nh_white","B01001I_001", "hispanic","B01001D_001", "asian","B19013_001", "median_hh_inc")## NYC Counties in the Census/ACS = NYC Boroughs in the complaints data ny_county_boros <-tribble(~county, ~borough,"New York County, New York", "Manhattan","Queens County, New York", "Queens","Kings County, New York", "Brooklyn","Bronx County, New York", "Bronx","Richmond County, New York", "Staten Island")## Data for the 5 NYC Boroughs, with Borough names patched incounty_data <-get_acs(geography ="county", variables = acs_vars$varname, state ="NY", geometry =TRUE) %>%rename(fips = GEOID, county = NAME) %>%select(-moe) %>% tibble::as_tibble() %>%pivot_wider(names_from = variable, values_from =c(estimate)) %>%rename_with(~ acs_vars$clean_name, all_of(acs_vars$varname)) %>%filter(county %in% ny_county_boros$county) %>%left_join(ny_county_boros, by ="county") %>%select(fips, county, borough, everything()) %>% sf::st_as_sf()
Getting data from the 2018-2022 5-year ACS
Code
county_data
Simple feature collection with 5 features and 10 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -74.25563 ymin: 40.4961 xmax: -73.70036 ymax: 40.91771
Geodetic CRS: NAD83
# A tibble: 5 × 11
fips county borough geometry white black asian nh_white
<chr> <chr> <chr> <MULTIPOLYGON [°]> <dbl> <dbl> <dbl> <dbl>
1 36061 New Yor… Manhat… (((-74.00641 40.6887, -7… 8.43e5 230583 199086 749800
2 36047 Kings C… Brookl… (((-74.04171 40.62638, -… 1.08e6 803621 321110 966614
3 36081 Queens … Queens (((-73.96262 40.73903, -… 7.21e5 417637 614652 560919
4 36085 Richmon… Staten… (((-74.16154 40.64416, -… 3.26e5 48670 56516 285903
5 36005 Bronx C… Bronx (((-73.77242 40.85954, -… 2.59e5 491689 56630 123902
# ℹ 3 more variables: hispanic <dbl>, pop <dbl>, median_hh_inc <dbl>
Code
## Join tables: ## Merge the county_data into aggregated complaints data## With this you can calculate e.g. complaints per capita within boros.## This will work for zip codes too.county_complaints <- nycomplaints |>group_by(borough, complaint_type) |>tally() |>mutate(prop = n/sum(n)) |>slice_max(order_by = prop, n =1) |>drop_na() |>left_join(county_data, by ="borough")county_complaints
# A tibble: 5 × 14
# Groups: borough [5]
borough complaint_type n prop fips county geometry
<chr> <chr> <int> <dbl> <chr> <chr> <MULTIPOLYGON [°]>
1 Bronx Housing and B… 5694 0.308 36005 Bronx… (((-73.77242 40.85954, -…
2 Brooklyn Housing and B… 12228 0.192 36047 Kings… (((-74.04171 40.62638, -…
3 Manhattan Housing and B… 28766 0.472 36061 New Y… (((-74.00641 40.6887, -7…
4 Queens Transportation 8201 0.172 36081 Queen… (((-73.96262 40.73903, -…
5 Staten Isla… Transportation 10754 0.326 36085 Richm… (((-74.16154 40.64416, -…
# ℹ 7 more variables: white <dbl>, black <dbl>, asian <dbl>, nh_white <dbl>,
# hispanic <dbl>, pop <dbl>, median_hh_inc <dbl>
Source Code
---title: "Final Project Examples"---## Setup```{r}library(tidyverse)library(here)library(socviz)## Mappinglibrary(sf)## Censuslibrary(tidycensus)options(tigris_use_cache =TRUE)## Activate your API key by uncommenting the next line and ## putting your key in the quotes# census_api_key("YOUR_API_KEY_HERE")## Install this package## remotes::install_github("kjhealy/nycomplaints")library(nycomplaints)```## Some helpful patterns for the final project```{r}## Count and Arrangenycomplaints |>count(complaint_type) |>arrange(desc(n))``````{r}## Group and count, ## top 3 complaints within each boroughnycomplaints |>group_by(borough, complaint_type) |>tally() |>slice_max(order_by = n, n =3)``````{r}## Group and calculate proportions: top complaint in each boronycomplaints |>group_by(borough, complaint_type) |>tally() |>mutate(prop = n/sum(n)) |>slice_max(order_by = prop, n =1)``````{r}## Census variables: official names and short namesacs_vars <-tribble(~varname, ~clean_name,"B01003_001", "pop","B01001B_001", "black","B01001A_001", "white","B01001H_001", "nh_white","B01001I_001", "hispanic","B01001D_001", "asian","B19013_001", "median_hh_inc")## NYC Counties in the Census/ACS = NYC Boroughs in the complaints data ny_county_boros <-tribble(~county, ~borough,"New York County, New York", "Manhattan","Queens County, New York", "Queens","Kings County, New York", "Brooklyn","Bronx County, New York", "Bronx","Richmond County, New York", "Staten Island")## Data for the 5 NYC Boroughs, with Borough names patched incounty_data <-get_acs(geography ="county", variables = acs_vars$varname, state ="NY", geometry =TRUE) %>%rename(fips = GEOID, county = NAME) %>%select(-moe) %>% tibble::as_tibble() %>%pivot_wider(names_from = variable, values_from =c(estimate)) %>%rename_with(~ acs_vars$clean_name, all_of(acs_vars$varname)) %>%filter(county %in% ny_county_boros$county) %>%left_join(ny_county_boros, by ="county") %>%select(fips, county, borough, everything()) %>% sf::st_as_sf()county_data``````{r}## Join tables: ## Merge the county_data into aggregated complaints data## With this you can calculate e.g. complaints per capita within boros.## This will work for zip codes too.county_complaints <- nycomplaints |>group_by(borough, complaint_type) |>tally() |>mutate(prop = n/sum(n)) |>slice_max(order_by = prop, n =1) |>drop_na() |>left_join(county_data, by ="borough")county_complaints```