library(here) # manage file pathslibrary(socviz) # data and some useful things, especially %nin%library(tidyverse) # your friend and minelibrary(scales) # Convenient scale labels
Attaching package: 'scales'
The following object is masked from 'package:purrr':
discard
The following object is masked from 'package:readr':
col_factor
library(tidycensus) # Tidily talk to the Censuslibrary(sf) # Draw maps with ggplot
Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
# A tibble: 8,959 × 3
name label concept
<chr> <chr> <chr>
1 H001001 Total HOUSING UNITS
2 H002001 Total URBAN AND RURAL
3 H002002 Total!!Urban URBAN AND RURAL
4 H002003 Total!!Urban!!Inside urbanized areas URBAN AND RURAL
5 H002004 Total!!Urban!!Inside urban clusters URBAN AND RURAL
6 H002005 Total!!Rural URBAN AND RURAL
7 H002006 Total!!Not defined for this file URBAN AND RURAL
8 H003001 Total OCCUPANCY STATUS
9 H003002 Total!!Occupied OCCUPANCY STATUS
10 H003003 Total!!Vacant OCCUPANCY STATUS
# ℹ 8,949 more rows
# Census variable namespopvars <-c("P005003", "P005004", "P005006", "P004003")# Get a county-level dataset for NC with these variables# The summary value is the total population.nc <-get_decennial(geography ="county",variables = popvars,year =2010,summary_var ="P001001",state ="NC") |>mutate(pct =100* (value / summary_value))nc
# A tibble: 400 × 6
GEOID NAME variable value summary_value pct
<chr> <chr> <chr> <dbl> <dbl> <dbl>
1 37007 Anson County, North Carolina P005003 12344 26948 45.8
2 37011 Avery County, North Carolina P005003 16029 17797 90.1
3 37003 Alexander County, North Carolina P005003 32671 37198 87.8
4 37015 Bertie County, North Carolina P005003 7393 21282 34.7
5 37013 Beaufort County, North Carolina P005003 31705 47759 66.4
6 37005 Alleghany County, North Carolina P005003 9862 11155 88.4
7 37001 Alamance County, North Carolina P005003 101718 151131 67.3
8 37009 Ashe County, North Carolina P005003 25420 27281 93.2
9 37017 Bladen County, North Carolina P005003 19242 35190 54.7
10 37019 Brunswick County, North Carolina P005003 86818 107431 80.8
# ℹ 390 more rows
# Get a county-level dataset for NC with these variables# The summary value is the total population.nc <-get_decennial(geography ="county",variables = popvars$variable, year =2010,summary_var ="P001001",state ="NC") |>mutate(pct =100* (value / summary_value))nc
# A tibble: 400 × 6
GEOID NAME variable value summary_value pct
<chr> <chr> <chr> <dbl> <dbl> <dbl>
1 37007 Anson County, North Carolina P005003 12344 26948 45.8
2 37011 Avery County, North Carolina P005003 16029 17797 90.1
3 37003 Alexander County, North Carolina P005003 32671 37198 87.8
4 37015 Bertie County, North Carolina P005003 7393 21282 34.7
5 37013 Beaufort County, North Carolina P005003 31705 47759 66.4
6 37005 Alleghany County, North Carolina P005003 9862 11155 88.4
7 37001 Alamance County, North Carolina P005003 101718 151131 67.3
8 37009 Ashe County, North Carolina P005003 25420 27281 93.2
9 37017 Bladen County, North Carolina P005003 19242 35190 54.7
10 37019 Brunswick County, North Carolina P005003 86818 107431 80.8
# ℹ 390 more rows
nc_wide |>ggplot(mapping =aes(x = nh_white_pct,y = nh_black_pct)) +geom_point() +labs(x ="Percent White", y ="Percent Black", title ="North Carolina Counties")
Population Pyramids
We can ask the Census for its estimates of the age breakdown of the population, using the get_estimates() function.
usa <-get_estimates(geography ="us",product ="characteristics",breakdown =c("SEX", "AGEGROUP"), breakdown_labels =TRUE,year =2019 )usa
# A tibble: 96 × 5
GEOID NAME value SEX AGEGROUP
<chr> <chr> <dbl> <chr> <fct>
1 1 United States 328239523 Both sexes All ages
2 1 United States 19576683 Both sexes Age 0 to 4 years
3 1 United States 20195895 Both sexes Age 5 to 9 years
4 1 United States 161657324 Male All ages
5 1 United States 10009207 Male Age 0 to 4 years
6 1 United States 10322762 Male Age 5 to 9 years
7 1 United States 166582199 Female All ages
8 1 United States 9567476 Female Age 0 to 4 years
9 1 United States 7528626 Female Age 70 to 74 years
10 1 United States 20798268 Both sexes Age 10 to 14 years
# ℹ 86 more rows
Population Pyramids
# For the %nin% operator make sure you have # library(socviz) aboveusa_pyr <- usa |>filter(AGEGROUP %nin%"All ages", str_detect(AGEGROUP, "Age"), SEX %nin%"Both sexes") |>mutate(value =ifelse(SEX =="Male", -value, value))usa_pyr
# A tibble: 36 × 5
GEOID NAME value SEX AGEGROUP
<chr> <chr> <dbl> <chr> <fct>
1 1 United States -10009207 Male Age 0 to 4 years
2 1 United States -10322762 Male Age 5 to 9 years
3 1 United States 9567476 Female Age 0 to 4 years
4 1 United States 7528626 Female Age 70 to 74 years
5 1 United States 9873133 Female Age 5 to 9 years
6 1 United States -10618261 Male Age 10 to 14 years
7 1 United States 10180007 Female Age 10 to 14 years
8 1 United States -11064752 Male Age 20 to 24 years
9 1 United States 10568188 Female Age 20 to 24 years
10 1 United States -12004570 Male Age 25 to 29 years
# ℹ 26 more rows
Population Pyramids
usa_pyr |>ggplot(mapping =aes(x = value, y = AGEGROUP, fill = SEX)) +geom_col(width =0.95, alpha =0.75) +labs(x ="Count", y =NULL, title="US Population Pyramid")
Population Pyramids
usa_pyr |>ggplot(mapping =aes(x = value, y = AGEGROUP, fill = SEX)) +geom_col(width =0.95, alpha =0.75) +# In the two scale functions we use \(x) label_number() # and \(x) str_remove_all() as anonymous functions. Slightly # more advanced usage than normal ...scale_x_continuous(labels = \(x) label_number(scale =0.001, suffix ="k")(abs(x))) +scale_y_discrete(labels = \(x) str_remove_all(x, "Age | years")) +labs(x ="Count", y =NULL, title="US Population Pyramid") +theme(legend.position ="bottom")