This workbook scrapes, cleans, and analyzes electric demand data from the CA-ISO. Electrical consumption has recently received attention as an high frequency alternative measure of macroeconomic activity. For example, nation-wide energy consumption is one of several factors in the Federal Reserve Bank of New York’s Weekly Economic Index. The goal of this analysis is to evaluate electrical consumption as proxy for output, particularly in the context of the pandemic induced recession. This script is written to query all data necessary, bit will use local data as available.
Code is published at: (https://github.com/noahkouchekinia/CAISO)[https://github.com/noahkouchekinia/CAISO]
#Set working directory
setwd("/home/noah/Documents/Code Samples/CAISO")
#Load Necessary Packages
library(tidyverse)
library(lubridate)
library(ggplot2)
library(tis)
library(tis)
library(jsonlite)
library(tigris)
library(zoo)
library(stargazer)
#A function to take the difference between UK and CA time on any given day.
#Later, a API call will require times in GMT+-0, rather than local time.
#The offset is not a constant of 8, because of different daylight saving schedules.
<- function(date,tz_a = "America/Los_Angeles",tz_b = "Europe/London"){
tz_offset <- date %>%
a_time as.character() %>%
paste("12:00:00")%>%
as.POSIXct(tz_a)%>%
with_tz(tz = tz_b)%>%
hour()
<- date %>%
b_time as.character() %>%
paste("12:00:00")%>%
as.POSIXct(tz_a)%>%
hour()
<- a_time-b_time
offset return(offset)
}
#Colors
<- "#2e5e8b"
col1 <- "#8baf3e"
col2 <- "#fcc62d"
col3 <- "#88cde5"
col4 <- "#474747"
col5
#Get BOD Theme
# source("S:/Monroe/ERcharts/r_backend/function.R")
Here, I query from the California Independent System Operator’s (ISO). Their OASIS API allowed me to pull hour by hour demand. However, querying from this API is challenging. Documentation is limited. Furthermore, the API limits the amount of data that can be downloaded with each pull, requiring many repetitive pulls.
#Set query parameters
<- as.Date("2011-06-01") #Set to after PGE structural break
start_date <- Sys.Date()
end_date <- "ACTUAL" #Select from: 2DA, 7DA, DAM, ACTUAL, RTM
market_process
#Check for data downloaded in previous scrapes, adjust scraping start date accordingly
<- read.csv("/home/noah/Documents/Code Samples/CAISO/Data/Actual Hourly Integrated Load by Region.csv") #CHANGE THIS TO RELATIVE PATH BEFORE PUB
extant_data $X <- NULL
extant_data$OPR_DT <- as.Date(extant_data$OPR_DT)
extant_data
<- max(start_date, max(extant_data$OPR_DT-1, na.rm = TRUE))
n_start_date
#Prepare dates used to loop through portions of days (30 at a time)
#Create Vector of start and end dates
<- as.Date(n_start_date:end_date,origin="1970-01-01") #all dates desired
all_dates <- c(n_start_date, all_dates[1:length(all_dates)%%30==0]) #start dates every 30 days
start_dates <- unique(c(start_dates[2:length(start_dates)],end_date)) #offset start dates to get end dates
end_dates if(length(start_dates)==1){end_dates <- end_date} #needed in case of only one scraping period
#Get correct dates to include tz offset
<- tz_offset(start_dates)
start_offset <- tz_offset(end_dates)
end_offset #Create list the length of start_dates, in order to hold query results
<- vector("list",length(start_dates))
data_list
#Set other query parameters
<- "http://oasis.caiso.com/oasisapi/SingleZip?queryname=SLD_FCST"
q_base <- paste0("market_run_id=",market_process)
q_process <- "resultformat=6"
q_format <- "version=1"
q_v
#Pull one thirty day period at a time
for(i in 1:length(start_dates)){
#Set variable query components
<- paste0("startdatetime=",format(start_dates[i], "%Y%m%d"),"T0",start_offset[i],":00-0000")
q_start_date <- paste0("enddatetime=",format(end_dates[i], "%Y%m%d"),"T0", end_offset[i],":00-0000")
q_end_date <-paste(q_base, q_process, q_format, q_start_date, q_end_date, q_v, sep="&") #Query URL for the OASIS API
query #Assemble query
<-paste(q_base, q_process, q_format, q_start_date, q_end_date, q_v, sep="&") #Query URL for the OASIS API
query #Download Query
= tempfile() #create temporary destination file
tf download.file(query,tf) #download file to tempfile
= unzip(tf,list=T)$Name[1] #Unzip downloaded file
fname <- read.csv(unz(tf, fname)) #Read delimited data from file
data_list[[i]] Sys.sleep(5) #Pause, so as not to sent overlapping queries
}
#Clean and export choice cuts of data
<- data_list[unlist(lapply(data_list,ncol))==14] %>%
caiso_df bind_rows()%>%
select(c("OPR_DT","OPR_HR","TAC_AREA_NAME","LABEL","POS","MW"))%>%
mutate(OPR_DT = as.Date(OPR_DT),
<- as.character(TAC_AREA_NAME))%>%
TAC_AREA_NAME bind_rows(extant_data)%>%
group_by(OPR_DT,OPR_HR,TAC_AREA_NAME) %>%
summarize(LABEL = last(LABEL), POS = last(POS),MW = last(MW)) %>%
mutate(date = OPR_DT, hour_f = factor(OPR_HR))
## `summarise()` regrouping output by 'OPR_DT', 'OPR_HR' (override with `.groups` argument)
#Save Data
write.csv(caiso_df, "/home/noah/Documents/Code Samples/CAISO//Data/Actual Hourly Integrated Load by Region.csv",row.names = FALSE)
rm(list = c("market_process", "extant_data","all_dates","end_dates","end_offset","fname","i","n_start_date",
"q_base", "q_end_date", "q_format", "q_process", "q_start_date","q_v","query","start_dates",
"start_offset", "tf"))
As I will soon demonstrate, electrical demand data is extremely volatile, and depends on things unrelated to economic output. Perhaps the largest contributor is weather; HVAC systems use a lot of energy. The NOAA produces releases “degree day” data, which are a measure of how much the temperature in a region deviates from room temperature, weighted by population. These degree day series are used by utilities to model and predict electrical consumption. We will use them to correct our electrical output series for weather.
#Set Query Parameters
<- "https://ftp.cpc.ncep.noaa.gov/htdocs/degree_days/weighted/daily_data/"
url_start <- year(start_date):year(end_date)
years <-c("/StatesCONUS.Cooling","/StatesCONUS.Heating")
categories <- ".txt"
extension
#Create list to hold results
<- cool_tables <-vector("list", length=length(years))
heat_tables
#Pull data year by year and category by categry (heat and cool degree days) with loops
#Double looping is not great, but iterations are low here.
<- 1
i for(year in years){
for(category in categories){
<- paste0(url_start,year,category,extension)
api_call <- read.table(api_call, sep = "|", skip = 3, stringsAsFactors = FALSE)
table <- t(table)
table colnames(table) <- table[1,]
colnames(table)[1] <- "date"
<- table[-1,]
table <- as.data.frame(table, stringsAsFactors = FALSE)
table if(grepl("Heat",category)) heat_tables[[i]] <- table
if(grepl("Cool",category)) cool_tables[[i]] <- table
}= i+1
i
}
#Combine and Clean results
<- bind_rows(heat_tables)%>%
heat_data select(date,CA)%>%
rename(heat = CA)
<- bind_rows(cool_tables)%>%
cool_data select(date,CA)%>%
rename(cool = CA)
<- full_join(heat_data, cool_data)%>%
degree_days mutate(date = as.Date(date,format = "%Y%m%d"),
heat = as.numeric(heat),
cool = as.numeric(cool))
## Joining, by = "date"
#Conclude
write.csv(degree_days, "~/Documents/Code Samples/CAISO//Data/degree_days.csv", row.names = FALSE)
California has a sizable and increasing amount of rooftop solar. If residents use rooftop solar to offset consumption of electricity from the grid, the trend and cyclical components of rooftop solar generation need to be removed from our electricity demanded series.
Perhaps the crudest way to get at solar generation capacity is the amount of daylight hours per day. This is cyclical throughout the year.
#I need to pick a single point to query to represent Calfiornia. Let's use the population centroid
#Source: https://www2.census.gov/geo/docs/reference/cenpop2010/CenPop2010_Mean_ST.txt
<- c(+35.463595,-119.325359)
ca_pop_centroid
#Check for extant data, let's not query data we already have.
<- file.exists("./Data/daylight.csv")
extant_exist if(extant_exist){
<- read.csv(file = "./Data/daylight.csv")
extant_data <- as.Date(extant_data$date)
extant_dates <- as.Date(start_date:end_date, origin ="1970-01-01")
date_range <- date_range[!date_range %in% extant_dates]
query_dates
}if(!extant_exist){
<- as.Date(start_date:end_date, origin ="1970-01-01")
query_dates
}
#Query API if nessesary
<- length(query_dates) > 0
query_needed if(query_needed){
#generate api calls
<- paste0("https://api.sunrise-sunset.org/json?",
api_calls "lat=", ca_pop_centroid[1],
"&lng=",ca_pop_centroid[2],
"&date=",query_dates,
"&formatted=0")
#Set vectors to store query results
<- character(length=length(api_calls))
sunrise <- character(length=length(api_calls))
sunset <- character(length=length(api_calls))
day_length
#Loop through api queries
for(i in 1:length(api_calls)){
<-fromJSON(url(api_calls[i]))[[1]]
results <-results$sunrise[1]
sunrise[i] <-results$sunset[1]
sunset[i] <-results$day_length[1]
day_length[i]
}#create data.frame
<- data.frame(date = query_dates, sunrise, sunset, day_length)
daylight #combine with old data, if nessesary
if(extant_exist){
<- rbind(daylight, extant_data)
daylight
}#write for future use
write.csv(daylight, file = "./Data/daylight.csv", row.names = FALSE)
}if(!query_needed){
<- extant_data
daylight
}#Encode date collumn
$date <- as.Date(daylight$date ) daylight
This portion is a work in progress.
#Get California Geographic boundaries
#I am ignoring islands, they are not on in CA-ISO anyways.
<- tigris::states()
us_shape <- as.data.frame(us_shape[us_shape$STUSPS == "CA","geometry"][[1]][[1]][[1]][[1]])
ca_geo <- paste(paste(ca_geo$V1,ca_geo$V2, sep = "+"),collapse = "%")
ca_geo_query
#Set up other components of query
= 'NOlwzKodWvBC5ZmHixTB1elMbiOiEZ8fNKDocxac'
api_key = paste0("POLYGON(",ca_geo_query,")")
wkt = "dhi,dni,ghi"
attributes = 'true'
leap_year = '60'
interval = 'false'
utc = 'Noah+Kouchekinia'
your_name = 'electric+demand+correcting'
reason_for_use = 'FRBSF'
your_affiliation = 'n.a.kouchekinia@sf.frb.org'
your_email = 'false'
mailing_list
#Build Query
= paste0("https://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?",
url "wkt=POLYGON(",ca_geo_query,")&",
"names=2020&",
"leap_day=",leap_year,"&",
"interval=",interval,"&",
"utc=",utc,"&",
"full_name=",your_name,"&",
"email=",your_email,"&",
"affiliation=",your_affiliation,"&",
"mailing_list=",mailing_list,"&",
"reason=",reason_for_use,"&",
"api_key=",api_key,"&",
"attributes=",attributes)
<- read_json(url) radiation_json
This portion is a work in progress
#placeholder
Electrical usage also varies with work schedules. We need day-of-the-week and holiday dummies to account for this variation.
#Get Federal Holidays Using the tis package
<- year(start_date):year(end_date)%>%
holidays holidays()%>%
data.frame(date = ., holiday = names(.))%>%
mutate(date = as.Date(as.character(date), format = "%Y%m%d"))
#Get Easter using tis package
<- year(start_date):year(end_date)%>%
easter easter()%>%
data.frame(date = ., holiday = "easter")%>%
mutate(date = as.Date(as.character(date), format = "%Y%m%d"))
#Get day of week effects
<- as.Date(start_date:end_date, origin = "1970-01-01") %>%
day_of_week as_tibble() %>%
mutate(date = value,
day_of_week = wday(date, label = TRUE))%>%
select(!value)
#Combine
<- bind_rows(holidays,easter) %>%
day_dummies full_join(day_of_week)%>%
mutate(holiday = replace(holiday, is.na(holiday),"none"),
holiday = factor(holiday, levels = c("none",unique(holiday)[unique(holiday) != "none"])),
day_of_week = factor(day_of_week))
## Joining, by = "date"
#Conclude
Most standard seasonal adjustment does not work on daily data
#Extract TAC*Date Panel from CASIO Data
<- caiso_df %>%
electric_tac_hourly filter(!(TAC_AREA_NAME =="CA ISO-TAC") & grepl("-TAC",TAC_AREA_NAME)) %>% #Keep only TAC desegregation level data
mutate(TAC_AREA_NAME = replace(TAC_AREA_NAME,TAC_AREA_NAME == "MWD-TAC","SCE-TAC"))%>% #join recently split MWD back to SCE
filter(!TAC_AREA_NAME == "VEA-TAC")%>% #Drop VEA, which is mostly in Nevada
group_by(date, OPR_HR, TAC_AREA_NAME) %>%
summarize(MW = sum(MW)) #Add together MWD & SCE
## `summarise()` regrouping output by 'date', 'OPR_HR' (override with `.groups` argument)
<- electric_tac_hourly %>%
electric_tac_daily group_by(date, TAC_AREA_NAME) %>%
summarize(MW = sum(MW), hours = n()) %>% #Add together hours for daily series
filter(hours == 24) %>% #Check that days are complete
select(!hours)
## `summarise()` regrouping output by 'date' (override with `.groups` argument)
#Assemble all data for model
<- electric_tac_daily %>%
daily_model_df left_join(degree_days) %>%
left_join(day_dummies) %>%
left_join(daylight) %>%
filter(date >= as.Date(start_date) & date <= as.Date(end_date))
## Joining, by = "date"
## Joining, by = "date"
## Joining, by = "date"
#Model Hourly series
#Here, we want to retain houly variation, but correct for weather and holidays
<- lm(MW ~ TAC_AREA_NAME +
daily_model *heat+
TAC_AREA_NAME*cool+
TAC_AREA_NAME*holiday+
TAC_AREA_NAME*day_of_week+
TAC_AREA_NAME*as.numeric(day_length),
TAC_AREA_NAMEdata = daily_model_df,
na.action = na.exclude)
stargazer(daily_model, type = "html")
Dependent variable: | |
MW | |
TAC_AREA_NAMESCE-TAC | 48,938.360*** |
(3,038.912) | |
TAC_AREA_NAMESDGE-TAC | -160,638.800*** |
(3,038.912) | |
heat | 315.366*** |
(44.044) | |
cool | 6,357.870*** |
(60.584) | |
holidayMLKing | 462.924 |
(3,751.402) | |
holidayGWBirthday | -12,735.310*** |
(3,748.219) | |
holidayMemorial | -15,173.600*** |
(3,955.890) | |
holidayIndependence | -15,634.220*** |
(4,157.497) | |
holidayLabor | -17,905.700*** |
(3,757.980) | |
holidayColumbus | -5,244.748 |
(3,755.142) | |
holidayVeterans | 330.730 |
(3,921.827) | |
holidayThanksgiving | -27,857.300*** |
(3,748.646) | |
holidayChristmas | -19,382.780*** |
(3,726.673) | |
holidayNewYears | -18,656.420*** |
(3,728.516) | |
holidayeaster | -17,639.120*** |
(3,946.800) | |
day_of_week.L | 3,707.617*** |
(527.653) | |
day_of_week.Q | -25,774.480*** |
(524.275) | |
day_of_week.C | 2,628.248*** |
(526.797) | |
day_of_week4 | -8,011.188*** |
(528.989) | |
day_of_week5 | 893.204* |
(525.747) | |
day_of_week6 | -819.718 |
(521.279) | |
as.numeric(day_length) | 0.773*** |
(0.046) | |
TAC_AREA_NAMESCE-TAC:heat | -809.506*** |
(62.288) | |
TAC_AREA_NAMESDGE-TAC:heat | -379.974*** |
(62.288) | |
TAC_AREA_NAMESCE-TAC:cool | 2,656.735*** |
(85.679) | |
TAC_AREA_NAMESDGE-TAC:cool | -4,826.801*** |
(85.679) | |
TAC_AREA_NAMESCE-TAC:holidayMLKing | -9,008.110* |
(5,305.284) | |
TAC_AREA_NAMESDGE-TAC:holidayMLKing | -2,695.387 |
(5,305.284) | |
TAC_AREA_NAMESCE-TAC:holidayGWBirthday | -3,567.764 |
(5,300.782) | |
TAC_AREA_NAMESDGE-TAC:holidayGWBirthday | 9,969.429* |
(5,300.782) | |
TAC_AREA_NAMESCE-TAC:holidayMemorial | -20,311.600*** |
(5,594.473) | |
TAC_AREA_NAMESDGE-TAC:holidayMemorial | 10,558.490* |
(5,594.473) | |
TAC_AREA_NAMESCE-TAC:holidayIndependence | -23,615.120*** |
(5,879.588) | |
TAC_AREA_NAMESDGE-TAC:holidayIndependence | 7,886.702 |
(5,879.588) | |
TAC_AREA_NAMESCE-TAC:holidayLabor | -11,581.610** |
(5,314.587) | |
TAC_AREA_NAMESDGE-TAC:holidayLabor | 14,005.530*** |
(5,314.587) | |
TAC_AREA_NAMESCE-TAC:holidayColumbus | 10,064.550* |
(5,310.572) | |
TAC_AREA_NAMESDGE-TAC:holidayColumbus | 6,681.239 |
(5,310.572) | |
TAC_AREA_NAMESCE-TAC:holidayVeterans | -5,382.590 |
(5,546.300) | |
TAC_AREA_NAMESDGE-TAC:holidayVeterans | -2,461.125 |
(5,546.300) | |
TAC_AREA_NAMESCE-TAC:holidayThanksgiving | -8,378.609 |
(5,301.386) | |
TAC_AREA_NAMESDGE-TAC:holidayThanksgiving | 22,268.250*** |
(5,301.386) | |
TAC_AREA_NAMESCE-TAC:holidayChristmas | -14,246.850*** |
(5,270.311) | |
TAC_AREA_NAMESDGE-TAC:holidayChristmas | 14,171.950*** |
(5,270.311) | |
TAC_AREA_NAMESCE-TAC:holidayNewYears | -8,825.066* |
(5,272.919) | |
TAC_AREA_NAMESDGE-TAC:holidayNewYears | 14,656.740*** |
(5,272.919) | |
TAC_AREA_NAMESCE-TAC:holidayeaster | 8,321.789 |
(5,581.618) | |
TAC_AREA_NAMESDGE-TAC:holidayeaster | 16,379.150*** |
(5,581.618) | |
TAC_AREA_NAMESCE-TAC:day_of_week.L | 1,645.484** |
(746.213) | |
TAC_AREA_NAMESDGE-TAC:day_of_week.L | -3,285.199*** |
(746.213) | |
TAC_AREA_NAMESCE-TAC:day_of_week.Q | -8,470.280*** |
(741.437) | |
TAC_AREA_NAMESDGE-TAC:day_of_week.Q | 20,252.040*** |
(741.437) | |
TAC_AREA_NAMESCE-TAC:day_of_week.C | 743.765 |
(745.003) | |
TAC_AREA_NAMESDGE-TAC:day_of_week.C | -2,061.820*** |
(745.003) | |
TAC_AREA_NAMESCE-TAC:day_of_week4 | -3,488.503*** |
(748.103) | |
TAC_AREA_NAMESDGE-TAC:day_of_week4 | 5,980.592*** |
(748.103) | |
TAC_AREA_NAMESCE-TAC:day_of_week5 | 601.044 |
(743.519) | |
TAC_AREA_NAMESDGE-TAC:day_of_week5 | -521.052 |
(743.519) | |
TAC_AREA_NAMESCE-TAC:day_of_week6 | -152.276 |
(737.200) | |
TAC_AREA_NAMESDGE-TAC:day_of_week6 | 589.508 |
(737.200) | |
TAC_AREA_NAMESCE-TAC:as.numeric(day_length) | -1.169*** |
(0.065) | |
TAC_AREA_NAMESDGE-TAC:as.numeric(day_length) | -1.185*** |
(0.065) | |
Constant | 230,821.100*** |
(2,148.835) | |
Observations | 10,647 |
R2 | 0.989 |
Adjusted R2 | 0.989 |
Residual Std. Error | 11,708.080 (df = 10584) |
F Statistic | 15,780.840*** (df = 62; 10584) |
Note: | p<0.1; p<0.05; p<0.01 |
<- daily_model_df %>%
daily_model_df ungroup()%>%
mutate(MW_predicted = predict(daily_model),
MW_corrected = (MW - MW_predicted) + mean(MW_predicted, na.rm = TRUE))%>%
group_by(date)%>%
summarize(MW_predicted = sum(MW_predicted), MW = sum(MW), MW_corrected = sum(MW_corrected))
## `summarise()` ungrouping output (override with `.groups` argument)
# plot(daily_model_df$date, daily_model_df$MW, type = "l")
# lines(daily_model_df$date, daily_model_df$MW_predicted, type = "l", col = "red")
# lines(daily_model_df$date, daily_model_df$MW_corrected, type = "l", col = "blue")
There is some remaining, mostly random, day-to-day variation we are not so interested in. Let’s remove this with a seven day moving average.
$MW_corrected_MA7 <- rollapply(daily_model_df$MW_corrected,
daily_model_dfwidth = 14,
fill = NA,
FUN = mean,
na.rm=TRUE)
# plot(daily_model_df$date, daily_model_df$MW, type = "l")
# lines(daily_model_df$date, daily_model_df$MW_predicted, type = "l", col = "red")
# lines(daily_model_df$date, daily_model_df$MW_corrected, type = "l", col = "blue")
# lines(daily_model_df$date, daily_model_df$MW_corrected_MA7, type = "l", col = "green")
#
# plot(daily_model_df$date, daily_model_df$MW_corrected_MA7, type = "l", col = "green")
#Modify Data for plot
<- daily_model_df %>%
fig_1_df select(date,MW_corrected,MW_corrected_MA7)%>%
pivot_longer(-date)%>%
group_by(name) %>%
mutate(mean = mean(value[year(date)==2019], na.rm = TRUE)) %>%
ungroup() %>%
mutate(percent_of_mean = (value/mean)*100)
#Set Annotation Points
<- as.Date("2020-03-19")
ca_sip
#Plot Using GGplot
<- ggplot(data = fig_1_df)+
fig_1 geom_vline(xintercept = ca_sip, col = "red")+
geom_line(aes(x=date,y=percent_of_mean, col = name), size = 1.25)+
scale_x_date(limits = c(as.Date("2020-01-01"), end_date))+
scale_y_continuous(limits = c(85, 110), labels = function(x){paste0(x-100,"%")}, name = "deviation from mean")+
scale_color_manual(labels = c("Weather/Weekday Corrected","14 day MA of corrected"),
values = c(col4, col1))+
labs(title = "Recent Change in California Electric Demand",
caption = "Note: Date of statewide shelter in place order shown in red.\nSource: California ISO" )+
theme_minimal()+ ##CHANGE TO BOD THEME
theme(axis.title.y = element_text(), axis.title.x = element_blank())+
theme(legend.position="top", legend.direction = "horizontal", legend.title = element_blank())
#Save
ggsave(fig_1, file = "./Figures/EL_Fig_1_Long_Demand_Series.png", width = 12, height = 8)
## Warning: Removed 6246 row(s) containing missing values (geom_path).
#Display
fig_1
## Warning: Removed 6246 row(s) containing missing values (geom_path).
#Adjust data for plot
<- daily_model_df %>%
fig_2_df select(date, MW, MW_corrected, MW_corrected_MA7)%>%
pivot_longer(-date)
#Plot with ggplot
<- ggplot(data = fig_2_df)+
fig_2 geom_vline(xintercept = ca_sip, col = "red")+
geom_line(aes(x=date,y=value/1000, col = name))+
labs(title = "California Electric Usage",
caption = "Note: Date of statewide shelter in place order shown in red.\nSource: California ISO",
y = "Gigawatts")+
scale_color_manual(labels = c("Raw Demand","Weather/Weekday Corrected","14 day MA of corrected"),
values = c(col3,col4,col1))+
theme_minimal()+ ##CHANGE TO BOD THEME
theme(axis.title.y = element_text(), axis.title.x = element_blank())+
theme(legend.position="top", legend.direction = "horizontal", legend.title = element_blank())
#Save
ggsave(fig_2, file = "./Figures/EL_Fig_2_Long_Demand_Series.png", width = 12, height = 8)
## Warning: Removed 14 row(s) containing missing values (geom_path).
#Display
fig_2
## Warning: Removed 14 row(s) containing missing values (geom_path).
#Structure data for plot
<- daily_model_df %>%
fig_3_area_df select(date, MW_corrected)%>%
mutate(year = as.character(year(date)))%>%
filter(year %in% as.character(2014:2019))%>%
mutate(month = month(date),
day = day(date))%>%
mutate(date = as.Date(paste0("2020-",month,"-",day)))%>%
select(date, year, MW_corrected)%>%
group_by(date) %>%
arrange(year) %>%
summarize(min = min(MW_corrected), max = max(MW_corrected))%>%
mutate(min = rollapply(min,width = 14, fill = NA,FUN = mean,na.rm=TRUE))%>%
mutate(max = rollapply(max,width = 14, fill = NA,FUN = mean,na.rm=TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
<- daily_model_df %>%
fig_3_line_df filter(year(date)==2020)
#Plot
<- ggplot(data = fig_3_area_df)+
fig_3 geom_vline(xintercept = ca_sip, col = "red")+
geom_ribbon(data = fig_3_area_df, aes(x=date,ymin = min/1000, ymax = max/1000, fill = "Range of previous 5 years"), col = NA)+
geom_line(data = fig_3_line_df, aes(x=date,y=MW_corrected_MA7/1000, color = "2020"), size = 1.25)+
labs(title = "CA ISO Electricity Usage: Comparing Recent Years",
y = "Gigawatts",
caption = "Note: Date of statewide shelter in place order shown in red.\nNote: Series are 14 day moving averages of weather/weekday corrected electric demand.\nSource: California ISO")+
scale_x_date(limits = c(as.Date("2020-01-07"), as.Date("2020-12-24")))+
theme_minimal()+
theme(axis.title.y = element_text(), axis.title.x = element_blank())+
theme(legend.position="top", legend.direction = "horizontal", legend.title = element_blank())+
scale_color_manual(label = "2020", values = col1)+
scale_fill_manual(label = "Range of previous 5 years", values = col4)
#Save File
ggsave(fig_3, file = "./Figures/EL_Fig_3_Adjusted_Demand_Over_Recent_Years.png",
width = 12, height = 8)
## Warning: Removed 13 row(s) containing missing values (geom_path).
#Display
fig_3
## Warning: Removed 13 row(s) containing missing values (geom_path).
Perhaps the pandemic economy has different patterns of electric consumption, beyond the dip in electric consumption. Lets look at electric usage throughout the day, by month.
<- electric_tac_hourly %>%
fig_5_df mutate(year = as.character(year(date)),
month = month(date, label = TRUE)) %>%
filter(year %in% c(2019,2020))%>%
group_by(year, month, OPR_HR, TAC_AREA_NAME) %>%
summarize(MW = mean(MW, na.rm = TRUE))%>%
group_by(year, month, OPR_HR) %>%
summarize(MW = sum(MW))
## `summarise()` regrouping output by 'year', 'month', 'OPR_HR' (override with `.groups` argument)
## `summarise()` regrouping output by 'year', 'month' (override with `.groups` argument)
ggplot(data = fig_5_df, aes(x = OPR_HR, y = MW, col = year, group = year))+
geom_line()+
facet_wrap(~ month, ncol = 4)+
theme_minimal()