First we will load some census data into two tables. The first contains population by county, which we will use later on in our county-level analysis. The second functions as a lookup for county and state names by fips code.

TODO: more recent census data?

census <- read_csv("../data/raw/co-est2018-alldata.csv") %>% 
  mutate(fips=paste0(STATE, COUNTY)) %>%
  dplyr::select(fips, POPESTIMATE2018)

countynames <- read_csv("../data/raw/co-est2018-alldata.csv") %>% 
  mutate(fips=paste0(STATE, COUNTY)) %>% 
  dplyr::select(fips, CTYNAME, STNAME)

Next we define two formatting functions that will make our output more human-readable.

fmt_c <- function(degrees_celsius) {
  paste0(format(degrees_celsius, digits=3), "ºC")
}

fmt_f <- function(degrees_fahrenheit) {
  paste0(format(degrees_fahrenheit, digits=3), "ºF")
}

Here’s the heart of our analysis. The function modelAnnualTempChg uses linear regression to model the relationship between the year and temperature. The slope of this model is an estimate of the rate at which the temperature is changing, which we then multiply by 124 to extrapolate the rate to an estimate of temperature change over the entire period from 1895 to 2019.

# this part is split out because we will reuse it later on.
extractModelVariables <-
  . %>% 
  mutate(
    slope = mod$coefficients[2],
    intercept = mod$coefficients[1],
    pvalue=summary(mod)$coefficients[,4][2], 
    rsquared=summary(mod)$r.squared,
    tempchg = slope * 124, 
    centurychg = slope * 100, 
    decadechg = slope * 10,
    tempchg_c = tempchg / 1.8,
    decadechg_c = decadechg / 1.8,
    bin = cut(tempchg_c, c(-Inf, 0, 0.5, 1.0, 1.5, 2, Inf))
  )

modelAnnualTempChg <-
  . %>% 
  do(mod = lm(temp ~ year, data = .)) %>% 
  extractModelVariables()

We’ll demonstrate the model first on the data for the Lower 48:

climdiv_national <- read_csv("../data/processed/climdiv_national.csv")

model_national <- 
  climdiv_national %>% 
  group_by(state) %>% # our fancy dplyr function expects to receive groups
  modelAnnualTempChg() 

Our model’s temperature change estimate for the Lower 48 is 1.05ºC (1.88ºF).

We also want to analyze temperature change on a seasonal basis. To do this we classify the months into seasons. One tricky thing is that we want to consider December and then the following January and February as one contiguous season. To do this, we create a year_adj variable that treats January and February as part of the previous year.

# the spiciest function in this notebook
season <- . %>% 
  mutate(
    season = case_when(
      month %in% c("Dec", "Jan", "Feb") ~ "Winter",
      month %in% c("Mar", "Apr", "May") ~ "Spring",
      month %in% c("Jun", "Jul", "Aug") ~ "Summer",
      month %in% c("Sep", "Oct", "Nov") ~ "Fall"
    ),
    year_adj = ifelse(month %in% c("Jan", "Feb"), year - 1, year)
  )

modelSeasonalTempChg <-
  . %>% 
  do(mod = lm(temp ~ year_adj, data = .)) %>% 
  extractModelVariables()

Lets use these models to find the fastest warming states in the Lower 48.

climdiv_state <- read_csv("../data/processed/climdiv_state.csv")
climdiv_state_year <- read_csv("../data/processed/climdiv_state_year.csv")
statefips <- read_delim(
  "../data/raw/state.txt",
  "|",
  escape_double = FALSE,
  trim_ws = TRUE,
  col_types = cols(
    STATE = col_character(),
    STUSAB = col_character(),
    STATE_NAME = col_character(),
    STATENS = col_character()
  )
)

model_state <-
  climdiv_state_year %>% 
  group_by(fips) %>% 
  modelAnnualTempChg() %>% 
  left_join(statefips, by=c("fips"="STATE")) %>%
  arrange(desc(tempchg))

head(model_state, 10) %>% 
  mutate(State=STATE_NAME, `Temperature change`=paste0(fmt_c(tempchg_c), " (", fmt_f(tempchg), ")")) %>% 
  select(State, `Temperature change`) %>% 
  knitr::kable()
State Temperature change
Rhode Island 2.04ºC (3.67ºF)
New Jersey 1.98ºC (3.56ºF)
Connecticut 1.8ºC (3.24ºF)
Maine 1.74ºC (3.14ºF)
Massachusetts 1.7ºC (3.06ºF)
Delaware 1.66ºC (2.99ºF)
North Dakota 1.65ºC (2.98ºF)
New Hampshire 1.62ºC (2.92ºF)
Utah 1.58ºC (2.84ºF)
Michigan 1.58ºC (2.84ºF)

Next, we find the fastest warming states in winter.

model_state_seasonal_tidy <-
  season(climdiv_state) %>%
  group_by(fips, year_adj, season) %>% 
  summarise(temp = mean(temp)) %>% 
  mutate(temp_c = (temp - 32) / 1.8) %>%
  filter(year_adj <= 2019 & year_adj >= 1895) %>% 
  group_by(fips, season) %>% 
  modelSeasonalTempChg() %>% 
  dplyr::select(fips, season, tempchg_c)

model_state_seasonal_max <-
  model_state_seasonal_tidy %>% 
  group_by(fips) %>% 
  arrange(-tempchg_c) %>% 
  summarise(max_warming_season=head(season, 1))
## Warning: Grouping rowwise data frame strips rowwise nature
model_state_seasonal <-
  model_state_seasonal_tidy %>% 
  spread(season, tempchg_c) %>% 
  left_join(model_state_seasonal_max, by="fips") %>% 
  left_join(select(model_state, fips, tempchg_c), by="fips") %>% 
  left_join(statefips, by=c("fips"="STATE")) %>%
  rename(Annual=tempchg_c)

write_csv(model_state_seasonal, "../data/processed/model_state.csv")
  
head(model_state_seasonal, 10) %>% 
  mutate(State=STATE_NAME, `Winter temperature change`=paste0(fmt_c(Winter), " (", fmt_f(Winter * 1.8), ")")) %>% 
  select(State, `Winter temperature change`) %>% 
  knitr::kable()
State Winter temperature change
Alabama 0.459ºC (0.825ºF)
Arizona 1.388ºC (2.499ºF)
Arkansas 0.532ºC (0.958ºF)
California 1.412ºC (2.542ºF)
Colorado 1.839ºC (3.310ºF)
Connecticut 2.634ºC (4.741ºF)
Delaware 2.201ºC (3.962ºF)
Florida 1.300ºC (2.340ºF)
Georgia 1.109ºC (1.997ºF)
Idaho 1.410ºC (2.538ºF)

Now we’ll do the same thing, but on the county level.

climdiv_county <- read_csv("../data/processed/climdiv_county.csv")
## Parsed with column specification:
## cols(
##   year = col_integer(),
##   fips = col_character(),
##   month = col_character(),
##   temp = col_double()
## )
climdiv_county_year <- read_csv("../data/processed/climdiv_county_year.csv")
## Parsed with column specification:
## cols(
##   fips = col_character(),
##   year = col_integer(),
##   temp = col_double(),
##   tempc = col_double()
## )
model_county <-
  climdiv_county_year %>% 
  group_by(fips) %>% 
  modelAnnualTempChg() %>%
  left_join(census, by="fips")

model_county_seasonal_tidy <-
  season(climdiv_county) %>%
  group_by(fips, year_adj, season) %>% 
  summarise(temp = mean(temp)) %>% 
  filter(year_adj <= 2019 & year_adj >= 1895) %>% 
  group_by(fips, season) %>% 
  modelSeasonalTempChg() %>% 
  left_join(census, by="fips") %>% 
  dplyr::select(fips, season, tempchg_c)

model_county_seasonal_max <-
  model_county_seasonal_tidy %>% 
  group_by(fips) %>% 
  arrange(-tempchg_c) %>% 
  summarise(max_warming_season=head(season, 1))
## Warning: Grouping rowwise data frame strips rowwise nature
model_county_seasonal <-
  model_county_seasonal_tidy %>% 
  spread(season, tempchg_c) %>% 
  left_join(model_county_seasonal_max, by="fips") %>% 
  left_join(select(model_county, fips, tempchg_c), by="fips") %>% 
  left_join(countynames, by="fips") %>% 
  rename(Annual=tempchg_c)

write_csv(model_county_seasonal, "../data/processed/model_county.csv")

County analysis

To find the number of people living in a county that has warmed by 2ºC or more, we aggregate the counties based on the temperature change category.

# Population of the 3,107 counties in our analysis (Lower 48 + D.C.)
totalpop <- sum(model_county$POPESTIMATE2018)

group_by(model_county, bin) %>% 
  summarise(pop = sum(POPESTIMATE2018)) %>% 
  mutate(pct = percent(pop / totalpop)) %>% 
  knitr::kable()
## Warning: Grouping rowwise data frame strips rowwise nature
bin pop pct
(-Inf,0] 7044566 2.2%
(0,0.5] 53826711 16.6%
(0.5,1] 86198082 26.5%
(1,1.5] 70319381 21.6%
(1.5,2] 71295957 21.9%
(2, Inf] 36317672 11.2%

Join the county data to a shapefile so we can map it! You’ll need to download this shapefile from the Census to run this code. You can run make to automate the download process.

countyshp <- 
  st_read("../data/shapefiles/cb_2018_us_county_20m/cb_2018_us_county_20m.shp")
## Reading layer `cb_2018_us_county_20m' from data source `/Users/muyskensj/git/data-climate-change-usa/data/shapefiles/cb_2018_us_county_20m/cb_2018_us_county_20m.shp' using driver `ESRI Shapefile'
## Simple feature collection with 3220 features and 9 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -179.1743 ymin: 17.91377 xmax: 179.7739 ymax: 71.35256
## epsg (SRID):    4269
## proj4string:    +proj=longlat +datum=NAD83 +no_defs
# ALAND is square meters
countyshp_data <-
  countyshp %>%
  mutate(fips=paste0(STATEFP, COUNTYFP)) %>%
  left_join(select(model_county, fips, tempchg, tempchg_c, POPESTIMATE2018), by="fips") %>%
  mutate(popdensity_land = POPESTIMATE2018 / ALAND)

# 113 county shapes without corresponding data. Note: The city of Lexington, VA is missing from the data. 51678
countyshp %>% 
  mutate(fips=paste0(STATEFP, COUNTYFP)) %>%
  anti_join(select(model_county, fips, tempchg), by="fips")# %>% View()
## Simple feature collection with 113 features and 10 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -179.1743 ymin: 17.91377 xmax: 179.7739 ymax: 71.35256
## epsg (SRID):    4269
## proj4string:    +proj=longlat +datum=NAD83 +no_defs
## First 10 features:
##    STATEFP COUNTYFP COUNTYNS       AFFGEOID GEOID           NAME LSAD
## 1       15      005 01702380 0500000US15005 15005        Kalawao   06
## 2       72      145 01804553 0500000US72145 72145      Vega Baja   13
## 3       72      061 01804511 0500000US72061 72061       Guaynabo   13
## 4       72      121 01804541 0500000US72121 72121  Sabana Grande   13
## 5       72      137 01804549 0500000US72137 72137       Toa Baja   13
## 6       72      139 01804550 0500000US72139 72139  Trujillo Alto   13
## 7       72      151 01804556 0500000US72151 72151        Yabucoa   13
## 8       02      016 01419965 0500000US02016 02016 Aleutians West   05
## 9       72      075 01804518 0500000US72075 72075     Juana Díaz   13
## 10      72      087 01804524 0500000US72087 72087          Loíza   13
##          ALAND      AWATER  fips                       geometry
## 1     31057603   105764468 15005 MULTIPOLYGON (((-157.0143 2...
## 2    118777649    57795019 72145 MULTIPOLYGON (((-66.44899 1...
## 3     71444638      468304 72061 MULTIPOLYGON (((-66.12172 1...
## 4     94050248       25323 72121 MULTIPOLYGON (((-66.97085 1...
## 5     60201692    48075984 72137 MULTIPOLYGON (((-66.25794 1...
## 6     53786133     1592599 72139 MULTIPOLYGON (((-66.02526 1...
## 7    143005179    72592521 72151 MULTIPOLYGON (((-66.01024 1...
## 8  11375510886 25186009907 02016 MULTIPOLYGON (((179.4813 51...
## 9    156117101   121328720 72075 MULTIPOLYGON (((-66.54722 1...
## 10    50147915   119761911 72087 MULTIPOLYGON (((-65.99079 1...
# No data without a county shape
select(model_county, fips, tempchg) %>% 
  anti_join(mutate(countyshp, fips=paste0(STATEFP, COUNTYFP)), by="fips")# %>% View()
## Source: local data frame [0 x 2]
## Groups: <by row>
## 
## # A tibble: 0 x 2
## # … with 2 variables: fips <chr>, tempchg <dbl>
st_write(countyshp_data, "../data/processed/model_county.geojson", delete_dsn = TRUE)
## Deleting source `../data/processed/model_county.geojson' using driver `GeoJSON'
## Writing layer `model_county' to data source `../data/processed/model_county.geojson' using driver `GeoJSON'
## features:       3220
## fields:         14
## geometry type:  Multi Polygon