This workbook scrapes, cleans, and analyzes electric demand data from the CA-ISO. Electrical consumption has recently received attention as an high frequency alternative measure of macroeconomic activity. For example, nation-wide energy consumption is one of several factors in the Federal Reserve Bank of New York’s Weekly Economic Index. The goal of this analysis is to evaluate electrical consumption as proxy for output, particularly in the context of the pandemic induced recession. This script is written to query all data necessary, bit will use local data as available.
Code is published at: (https://github.com/noahkouchekinia/CAISO)[https://github.com/noahkouchekinia/CAISO]
#Set working directory
setwd("/home/noah/Documents/Code Samples/CAISO")
#Load Necessary Packages
library(tidyverse)
library(lubridate)
library(ggplot2)
library(tis)
library(tis)
library(jsonlite)
library(tigris)
library(zoo)
library(stargazer)#A function to take the difference between UK and CA time on any given day.
#Later, a API call will require times in GMT+-0, rather than local time.
#The offset is not a constant of 8, because of different daylight saving schedules.
tz_offset <- function(date,tz_a = "America/Los_Angeles",tz_b = "Europe/London"){
a_time <- date %>%
as.character() %>%
paste("12:00:00")%>%
as.POSIXct(tz_a)%>%
with_tz(tz = tz_b)%>%
hour()
b_time <- date %>%
as.character() %>%
paste("12:00:00")%>%
as.POSIXct(tz_a)%>%
hour()
offset <- a_time-b_time
return(offset)
}
#Colors
col1 <- "#2e5e8b"
col2 <- "#8baf3e"
col3 <- "#fcc62d"
col4 <- "#88cde5"
col5 <- "#474747"
#Get BOD Theme
# source("S:/Monroe/ERcharts/r_backend/function.R") Here, I query from the California Independent System Operator’s (ISO). Their OASIS API allowed me to pull hour by hour demand. However, querying from this API is challenging. Documentation is limited. Furthermore, the API limits the amount of data that can be downloaded with each pull, requiring many repetitive pulls.
#Set query parameters
start_date <- as.Date("2011-06-01") #Set to after PGE structural break
end_date <- Sys.Date()
market_process <- "ACTUAL" #Select from: 2DA, 7DA, DAM, ACTUAL, RTM
#Check for data downloaded in previous scrapes, adjust scraping start date accordingly
extant_data <- read.csv("/home/noah/Documents/Code Samples/CAISO/Data/Actual Hourly Integrated Load by Region.csv") #CHANGE THIS TO RELATIVE PATH BEFORE PUB
extant_data$X <- NULL
extant_data$OPR_DT <- as.Date(extant_data$OPR_DT)
n_start_date <- max(start_date, max(extant_data$OPR_DT-1, na.rm = TRUE))
#Prepare dates used to loop through portions of days (30 at a time)
#Create Vector of start and end dates
all_dates <- as.Date(n_start_date:end_date,origin="1970-01-01") #all dates desired
start_dates <- c(n_start_date, all_dates[1:length(all_dates)%%30==0]) #start dates every 30 days
end_dates <- unique(c(start_dates[2:length(start_dates)],end_date)) #offset start dates to get end dates
if(length(start_dates)==1){end_dates <- end_date} #needed in case of only one scraping period
#Get correct dates to include tz offset
start_offset <- tz_offset(start_dates)
end_offset <- tz_offset(end_dates)
#Create list the length of start_dates, in order to hold query results
data_list <- vector("list",length(start_dates))
#Set other query parameters
q_base <- "http://oasis.caiso.com/oasisapi/SingleZip?queryname=SLD_FCST"
q_process <- paste0("market_run_id=",market_process)
q_format <- "resultformat=6"
q_v <- "version=1"
#Pull one thirty day period at a time
for(i in 1:length(start_dates)){
#Set variable query components
q_start_date <- paste0("startdatetime=",format(start_dates[i], "%Y%m%d"),"T0",start_offset[i],":00-0000")
q_end_date <- paste0("enddatetime=",format(end_dates[i], "%Y%m%d"),"T0", end_offset[i],":00-0000")
query <-paste(q_base, q_process, q_format, q_start_date, q_end_date, q_v, sep="&") #Query URL for the OASIS API
#Assemble query
query <-paste(q_base, q_process, q_format, q_start_date, q_end_date, q_v, sep="&") #Query URL for the OASIS API
#Download Query
tf = tempfile() #create temporary destination file
download.file(query,tf) #download file to tempfile
fname = unzip(tf,list=T)$Name[1] #Unzip downloaded file
data_list[[i]] <- read.csv(unz(tf, fname)) #Read delimited data from file
Sys.sleep(5) #Pause, so as not to sent overlapping queries
}
#Clean and export choice cuts of data
caiso_df <- data_list[unlist(lapply(data_list,ncol))==14] %>%
bind_rows()%>%
select(c("OPR_DT","OPR_HR","TAC_AREA_NAME","LABEL","POS","MW"))%>%
mutate(OPR_DT = as.Date(OPR_DT),
TAC_AREA_NAME <- as.character(TAC_AREA_NAME))%>%
bind_rows(extant_data)%>%
group_by(OPR_DT,OPR_HR,TAC_AREA_NAME) %>%
summarize(LABEL = last(LABEL), POS = last(POS),MW = last(MW)) %>%
mutate(date = OPR_DT, hour_f = factor(OPR_HR))## `summarise()` regrouping output by 'OPR_DT', 'OPR_HR' (override with `.groups` argument)
#Save Data
write.csv(caiso_df, "/home/noah/Documents/Code Samples/CAISO//Data/Actual Hourly Integrated Load by Region.csv",row.names = FALSE)
rm(list = c("market_process", "extant_data","all_dates","end_dates","end_offset","fname","i","n_start_date",
"q_base", "q_end_date", "q_format", "q_process", "q_start_date","q_v","query","start_dates",
"start_offset", "tf"))As I will soon demonstrate, electrical demand data is extremely volatile, and depends on things unrelated to economic output. Perhaps the largest contributor is weather; HVAC systems use a lot of energy. The NOAA produces releases “degree day” data, which are a measure of how much the temperature in a region deviates from room temperature, weighted by population. These degree day series are used by utilities to model and predict electrical consumption. We will use them to correct our electrical output series for weather.
#Set Query Parameters
url_start <- "https://ftp.cpc.ncep.noaa.gov/htdocs/degree_days/weighted/daily_data/"
years <- year(start_date):year(end_date)
categories <-c("/StatesCONUS.Cooling","/StatesCONUS.Heating")
extension <- ".txt"
#Create list to hold results
heat_tables <- cool_tables <-vector("list", length=length(years))
#Pull data year by year and category by categry (heat and cool degree days) with loops
#Double looping is not great, but iterations are low here.
i <- 1
for(year in years){
for(category in categories){
api_call <- paste0(url_start,year,category,extension)
table <- read.table(api_call, sep = "|", skip = 3, stringsAsFactors = FALSE)
table <- t(table)
colnames(table) <- table[1,]
colnames(table)[1] <- "date"
table <- table[-1,]
table <- as.data.frame(table, stringsAsFactors = FALSE)
if(grepl("Heat",category)) heat_tables[[i]] <- table
if(grepl("Cool",category)) cool_tables[[i]] <- table
}
i = i+1
}
#Combine and Clean results
heat_data <- bind_rows(heat_tables)%>%
select(date,CA)%>%
rename(heat = CA)
cool_data <- bind_rows(cool_tables)%>%
select(date,CA)%>%
rename(cool = CA)
degree_days <- full_join(heat_data, cool_data)%>%
mutate(date = as.Date(date,format = "%Y%m%d"),
heat = as.numeric(heat),
cool = as.numeric(cool))## Joining, by = "date"
#Conclude
write.csv(degree_days, "~/Documents/Code Samples/CAISO//Data/degree_days.csv", row.names = FALSE)California has a sizable and increasing amount of rooftop solar. If residents use rooftop solar to offset consumption of electricity from the grid, the trend and cyclical components of rooftop solar generation need to be removed from our electricity demanded series.
Perhaps the crudest way to get at solar generation capacity is the amount of daylight hours per day. This is cyclical throughout the year.
#I need to pick a single point to query to represent Calfiornia. Let's use the population centroid
#Source: https://www2.census.gov/geo/docs/reference/cenpop2010/CenPop2010_Mean_ST.txt
ca_pop_centroid <- c(+35.463595,-119.325359)
#Check for extant data, let's not query data we already have.
extant_exist <- file.exists("./Data/daylight.csv")
if(extant_exist){
extant_data <- read.csv(file = "./Data/daylight.csv")
extant_dates <- as.Date(extant_data$date)
date_range <- as.Date(start_date:end_date, origin ="1970-01-01")
query_dates <- date_range[!date_range %in% extant_dates]
}
if(!extant_exist){
query_dates <- as.Date(start_date:end_date, origin ="1970-01-01")
}
#Query API if nessesary
query_needed <- length(query_dates) > 0
if(query_needed){
#generate api calls
api_calls <- paste0("https://api.sunrise-sunset.org/json?",
"lat=", ca_pop_centroid[1],
"&lng=",ca_pop_centroid[2],
"&date=",query_dates,
"&formatted=0")
#Set vectors to store query results
sunrise <- character(length=length(api_calls))
sunset <- character(length=length(api_calls))
day_length <- character(length=length(api_calls))
#Loop through api queries
for(i in 1:length(api_calls)){
results <-fromJSON(url(api_calls[i]))[[1]]
sunrise[i] <-results$sunrise[1]
sunset[i] <-results$sunset[1]
day_length[i] <-results$day_length[1]
}
#create data.frame
daylight <- data.frame(date = query_dates, sunrise, sunset, day_length)
#combine with old data, if nessesary
if(extant_exist){
daylight <- rbind(daylight, extant_data)
}
#write for future use
write.csv(daylight, file = "./Data/daylight.csv", row.names = FALSE)
}
if(!query_needed){
daylight <- extant_data
}
#Encode date collumn
daylight$date <- as.Date(daylight$date ) This portion is a work in progress.
#Get California Geographic boundaries
#I am ignoring islands, they are not on in CA-ISO anyways.
us_shape <- tigris::states()
ca_geo <- as.data.frame(us_shape[us_shape$STUSPS == "CA","geometry"][[1]][[1]][[1]][[1]])
ca_geo_query <- paste(paste(ca_geo$V1,ca_geo$V2, sep = "+"),collapse = "%")
#Set up other components of query
api_key = 'NOlwzKodWvBC5ZmHixTB1elMbiOiEZ8fNKDocxac'
wkt = paste0("POLYGON(",ca_geo_query,")")
attributes = "dhi,dni,ghi"
leap_year = 'true'
interval = '60'
utc = 'false'
your_name = 'Noah+Kouchekinia'
reason_for_use = 'electric+demand+correcting'
your_affiliation = 'FRBSF'
your_email = 'n.a.kouchekinia@sf.frb.org'
mailing_list = 'false'
#Build Query
url = paste0("https://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?",
"wkt=POLYGON(",ca_geo_query,")&",
"names=2020&",
"leap_day=",leap_year,"&",
"interval=",interval,"&",
"utc=",utc,"&",
"full_name=",your_name,"&",
"email=",your_email,"&",
"affiliation=",your_affiliation,"&",
"mailing_list=",mailing_list,"&",
"reason=",reason_for_use,"&",
"api_key=",api_key,"&",
"attributes=",attributes)
radiation_json <- read_json(url)This portion is a work in progress
#placeholderElectrical usage also varies with work schedules. We need day-of-the-week and holiday dummies to account for this variation.
#Get Federal Holidays Using the tis package
holidays <- year(start_date):year(end_date)%>%
holidays()%>%
data.frame(date = ., holiday = names(.))%>%
mutate(date = as.Date(as.character(date), format = "%Y%m%d"))
#Get Easter using tis package
easter <- year(start_date):year(end_date)%>%
easter()%>%
data.frame(date = ., holiday = "easter")%>%
mutate(date = as.Date(as.character(date), format = "%Y%m%d"))
#Get day of week effects
day_of_week <- as.Date(start_date:end_date, origin = "1970-01-01") %>%
as_tibble() %>%
mutate(date = value,
day_of_week = wday(date, label = TRUE))%>%
select(!value)
#Combine
day_dummies <- bind_rows(holidays,easter) %>%
full_join(day_of_week)%>%
mutate(holiday = replace(holiday, is.na(holiday),"none"),
holiday = factor(holiday, levels = c("none",unique(holiday)[unique(holiday) != "none"])),
day_of_week = factor(day_of_week))## Joining, by = "date"
#Conclude Most standard seasonal adjustment does not work on daily data
#Extract TAC*Date Panel from CASIO Data
electric_tac_hourly <- caiso_df %>%
filter(!(TAC_AREA_NAME =="CA ISO-TAC") & grepl("-TAC",TAC_AREA_NAME)) %>% #Keep only TAC desegregation level data
mutate(TAC_AREA_NAME = replace(TAC_AREA_NAME,TAC_AREA_NAME == "MWD-TAC","SCE-TAC"))%>% #join recently split MWD back to SCE
filter(!TAC_AREA_NAME == "VEA-TAC")%>% #Drop VEA, which is mostly in Nevada
group_by(date, OPR_HR, TAC_AREA_NAME) %>%
summarize(MW = sum(MW)) #Add together MWD & SCE## `summarise()` regrouping output by 'date', 'OPR_HR' (override with `.groups` argument)
electric_tac_daily <- electric_tac_hourly %>%
group_by(date, TAC_AREA_NAME) %>%
summarize(MW = sum(MW), hours = n()) %>% #Add together hours for daily series
filter(hours == 24) %>% #Check that days are complete
select(!hours)## `summarise()` regrouping output by 'date' (override with `.groups` argument)
#Assemble all data for model
daily_model_df <- electric_tac_daily %>%
left_join(degree_days) %>%
left_join(day_dummies) %>%
left_join(daylight) %>%
filter(date >= as.Date(start_date) & date <= as.Date(end_date))## Joining, by = "date"
## Joining, by = "date"
## Joining, by = "date"
#Model Hourly series
#Here, we want to retain houly variation, but correct for weather and holidays
daily_model <- lm(MW ~ TAC_AREA_NAME +
TAC_AREA_NAME*heat+
TAC_AREA_NAME*cool+
TAC_AREA_NAME*holiday+
TAC_AREA_NAME*day_of_week+
TAC_AREA_NAME*as.numeric(day_length),
data = daily_model_df,
na.action = na.exclude)
stargazer(daily_model, type = "html")| Dependent variable: | |
| MW | |
| TAC_AREA_NAMESCE-TAC | 48,938.360*** |
| (3,038.912) | |
| TAC_AREA_NAMESDGE-TAC | -160,638.800*** |
| (3,038.912) | |
| heat | 315.366*** |
| (44.044) | |
| cool | 6,357.870*** |
| (60.584) | |
| holidayMLKing | 462.924 |
| (3,751.402) | |
| holidayGWBirthday | -12,735.310*** |
| (3,748.219) | |
| holidayMemorial | -15,173.600*** |
| (3,955.890) | |
| holidayIndependence | -15,634.220*** |
| (4,157.497) | |
| holidayLabor | -17,905.700*** |
| (3,757.980) | |
| holidayColumbus | -5,244.748 |
| (3,755.142) | |
| holidayVeterans | 330.730 |
| (3,921.827) | |
| holidayThanksgiving | -27,857.300*** |
| (3,748.646) | |
| holidayChristmas | -19,382.780*** |
| (3,726.673) | |
| holidayNewYears | -18,656.420*** |
| (3,728.516) | |
| holidayeaster | -17,639.120*** |
| (3,946.800) | |
| day_of_week.L | 3,707.617*** |
| (527.653) | |
| day_of_week.Q | -25,774.480*** |
| (524.275) | |
| day_of_week.C | 2,628.248*** |
| (526.797) | |
| day_of_week4 | -8,011.188*** |
| (528.989) | |
| day_of_week5 | 893.204* |
| (525.747) | |
| day_of_week6 | -819.718 |
| (521.279) | |
| as.numeric(day_length) | 0.773*** |
| (0.046) | |
| TAC_AREA_NAMESCE-TAC:heat | -809.506*** |
| (62.288) | |
| TAC_AREA_NAMESDGE-TAC:heat | -379.974*** |
| (62.288) | |
| TAC_AREA_NAMESCE-TAC:cool | 2,656.735*** |
| (85.679) | |
| TAC_AREA_NAMESDGE-TAC:cool | -4,826.801*** |
| (85.679) | |
| TAC_AREA_NAMESCE-TAC:holidayMLKing | -9,008.110* |
| (5,305.284) | |
| TAC_AREA_NAMESDGE-TAC:holidayMLKing | -2,695.387 |
| (5,305.284) | |
| TAC_AREA_NAMESCE-TAC:holidayGWBirthday | -3,567.764 |
| (5,300.782) | |
| TAC_AREA_NAMESDGE-TAC:holidayGWBirthday | 9,969.429* |
| (5,300.782) | |
| TAC_AREA_NAMESCE-TAC:holidayMemorial | -20,311.600*** |
| (5,594.473) | |
| TAC_AREA_NAMESDGE-TAC:holidayMemorial | 10,558.490* |
| (5,594.473) | |
| TAC_AREA_NAMESCE-TAC:holidayIndependence | -23,615.120*** |
| (5,879.588) | |
| TAC_AREA_NAMESDGE-TAC:holidayIndependence | 7,886.702 |
| (5,879.588) | |
| TAC_AREA_NAMESCE-TAC:holidayLabor | -11,581.610** |
| (5,314.587) | |
| TAC_AREA_NAMESDGE-TAC:holidayLabor | 14,005.530*** |
| (5,314.587) | |
| TAC_AREA_NAMESCE-TAC:holidayColumbus | 10,064.550* |
| (5,310.572) | |
| TAC_AREA_NAMESDGE-TAC:holidayColumbus | 6,681.239 |
| (5,310.572) | |
| TAC_AREA_NAMESCE-TAC:holidayVeterans | -5,382.590 |
| (5,546.300) | |
| TAC_AREA_NAMESDGE-TAC:holidayVeterans | -2,461.125 |
| (5,546.300) | |
| TAC_AREA_NAMESCE-TAC:holidayThanksgiving | -8,378.609 |
| (5,301.386) | |
| TAC_AREA_NAMESDGE-TAC:holidayThanksgiving | 22,268.250*** |
| (5,301.386) | |
| TAC_AREA_NAMESCE-TAC:holidayChristmas | -14,246.850*** |
| (5,270.311) | |
| TAC_AREA_NAMESDGE-TAC:holidayChristmas | 14,171.950*** |
| (5,270.311) | |
| TAC_AREA_NAMESCE-TAC:holidayNewYears | -8,825.066* |
| (5,272.919) | |
| TAC_AREA_NAMESDGE-TAC:holidayNewYears | 14,656.740*** |
| (5,272.919) | |
| TAC_AREA_NAMESCE-TAC:holidayeaster | 8,321.789 |
| (5,581.618) | |
| TAC_AREA_NAMESDGE-TAC:holidayeaster | 16,379.150*** |
| (5,581.618) | |
| TAC_AREA_NAMESCE-TAC:day_of_week.L | 1,645.484** |
| (746.213) | |
| TAC_AREA_NAMESDGE-TAC:day_of_week.L | -3,285.199*** |
| (746.213) | |
| TAC_AREA_NAMESCE-TAC:day_of_week.Q | -8,470.280*** |
| (741.437) | |
| TAC_AREA_NAMESDGE-TAC:day_of_week.Q | 20,252.040*** |
| (741.437) | |
| TAC_AREA_NAMESCE-TAC:day_of_week.C | 743.765 |
| (745.003) | |
| TAC_AREA_NAMESDGE-TAC:day_of_week.C | -2,061.820*** |
| (745.003) | |
| TAC_AREA_NAMESCE-TAC:day_of_week4 | -3,488.503*** |
| (748.103) | |
| TAC_AREA_NAMESDGE-TAC:day_of_week4 | 5,980.592*** |
| (748.103) | |
| TAC_AREA_NAMESCE-TAC:day_of_week5 | 601.044 |
| (743.519) | |
| TAC_AREA_NAMESDGE-TAC:day_of_week5 | -521.052 |
| (743.519) | |
| TAC_AREA_NAMESCE-TAC:day_of_week6 | -152.276 |
| (737.200) | |
| TAC_AREA_NAMESDGE-TAC:day_of_week6 | 589.508 |
| (737.200) | |
| TAC_AREA_NAMESCE-TAC:as.numeric(day_length) | -1.169*** |
| (0.065) | |
| TAC_AREA_NAMESDGE-TAC:as.numeric(day_length) | -1.185*** |
| (0.065) | |
| Constant | 230,821.100*** |
| (2,148.835) | |
| Observations | 10,647 |
| R2 | 0.989 |
| Adjusted R2 | 0.989 |
| Residual Std. Error | 11,708.080 (df = 10584) |
| F Statistic | 15,780.840*** (df = 62; 10584) |
| Note: | p<0.1; p<0.05; p<0.01 |
daily_model_df <- daily_model_df %>%
ungroup()%>%
mutate(MW_predicted = predict(daily_model),
MW_corrected = (MW - MW_predicted) + mean(MW_predicted, na.rm = TRUE))%>%
group_by(date)%>%
summarize(MW_predicted = sum(MW_predicted), MW = sum(MW), MW_corrected = sum(MW_corrected))## `summarise()` ungrouping output (override with `.groups` argument)
# plot(daily_model_df$date, daily_model_df$MW, type = "l")
# lines(daily_model_df$date, daily_model_df$MW_predicted, type = "l", col = "red")
# lines(daily_model_df$date, daily_model_df$MW_corrected, type = "l", col = "blue")There is some remaining, mostly random, day-to-day variation we are not so interested in. Let’s remove this with a seven day moving average.
daily_model_df$MW_corrected_MA7 <- rollapply(daily_model_df$MW_corrected,
width = 14,
fill = NA,
FUN = mean,
na.rm=TRUE)
# plot(daily_model_df$date, daily_model_df$MW, type = "l")
# lines(daily_model_df$date, daily_model_df$MW_predicted, type = "l", col = "red")
# lines(daily_model_df$date, daily_model_df$MW_corrected, type = "l", col = "blue")
# lines(daily_model_df$date, daily_model_df$MW_corrected_MA7, type = "l", col = "green")
#
# plot(daily_model_df$date, daily_model_df$MW_corrected_MA7, type = "l", col = "green")#Modify Data for plot
fig_1_df <- daily_model_df %>%
select(date,MW_corrected,MW_corrected_MA7)%>%
pivot_longer(-date)%>%
group_by(name) %>%
mutate(mean = mean(value[year(date)==2019], na.rm = TRUE)) %>%
ungroup() %>%
mutate(percent_of_mean = (value/mean)*100)
#Set Annotation Points
ca_sip <- as.Date("2020-03-19")
#Plot Using GGplot
fig_1 <- ggplot(data = fig_1_df)+
geom_vline(xintercept = ca_sip, col = "red")+
geom_line(aes(x=date,y=percent_of_mean, col = name), size = 1.25)+
scale_x_date(limits = c(as.Date("2020-01-01"), end_date))+
scale_y_continuous(limits = c(85, 110), labels = function(x){paste0(x-100,"%")}, name = "deviation from mean")+
scale_color_manual(labels = c("Weather/Weekday Corrected","14 day MA of corrected"),
values = c(col4, col1))+
labs(title = "Recent Change in California Electric Demand",
caption = "Note: Date of statewide shelter in place order shown in red.\nSource: California ISO" )+
theme_minimal()+ ##CHANGE TO BOD THEME
theme(axis.title.y = element_text(), axis.title.x = element_blank())+
theme(legend.position="top", legend.direction = "horizontal", legend.title = element_blank())
#Save
ggsave(fig_1, file = "./Figures/EL_Fig_1_Long_Demand_Series.png", width = 12, height = 8)## Warning: Removed 6246 row(s) containing missing values (geom_path).
#Display
fig_1## Warning: Removed 6246 row(s) containing missing values (geom_path).
#Adjust data for plot
fig_2_df <- daily_model_df %>%
select(date, MW, MW_corrected, MW_corrected_MA7)%>%
pivot_longer(-date)
#Plot with ggplot
fig_2 <- ggplot(data = fig_2_df)+
geom_vline(xintercept = ca_sip, col = "red")+
geom_line(aes(x=date,y=value/1000, col = name))+
labs(title = "California Electric Usage",
caption = "Note: Date of statewide shelter in place order shown in red.\nSource: California ISO",
y = "Gigawatts")+
scale_color_manual(labels = c("Raw Demand","Weather/Weekday Corrected","14 day MA of corrected"),
values = c(col3,col4,col1))+
theme_minimal()+ ##CHANGE TO BOD THEME
theme(axis.title.y = element_text(), axis.title.x = element_blank())+
theme(legend.position="top", legend.direction = "horizontal", legend.title = element_blank())
#Save
ggsave(fig_2, file = "./Figures/EL_Fig_2_Long_Demand_Series.png", width = 12, height = 8)## Warning: Removed 14 row(s) containing missing values (geom_path).
#Display
fig_2## Warning: Removed 14 row(s) containing missing values (geom_path).
#Structure data for plot
fig_3_area_df <- daily_model_df %>%
select(date, MW_corrected)%>%
mutate(year = as.character(year(date)))%>%
filter(year %in% as.character(2014:2019))%>%
mutate(month = month(date),
day = day(date))%>%
mutate(date = as.Date(paste0("2020-",month,"-",day)))%>%
select(date, year, MW_corrected)%>%
group_by(date) %>%
arrange(year) %>%
summarize(min = min(MW_corrected), max = max(MW_corrected))%>%
mutate(min = rollapply(min,width = 14, fill = NA,FUN = mean,na.rm=TRUE))%>%
mutate(max = rollapply(max,width = 14, fill = NA,FUN = mean,na.rm=TRUE))## `summarise()` ungrouping output (override with `.groups` argument)
fig_3_line_df <- daily_model_df %>%
filter(year(date)==2020)
#Plot
fig_3 <- ggplot(data = fig_3_area_df)+
geom_vline(xintercept = ca_sip, col = "red")+
geom_ribbon(data = fig_3_area_df, aes(x=date,ymin = min/1000, ymax = max/1000, fill = "Range of previous 5 years"), col = NA)+
geom_line(data = fig_3_line_df, aes(x=date,y=MW_corrected_MA7/1000, color = "2020"), size = 1.25)+
labs(title = "CA ISO Electricity Usage: Comparing Recent Years",
y = "Gigawatts",
caption = "Note: Date of statewide shelter in place order shown in red.\nNote: Series are 14 day moving averages of weather/weekday corrected electric demand.\nSource: California ISO")+
scale_x_date(limits = c(as.Date("2020-01-07"), as.Date("2020-12-24")))+
theme_minimal()+
theme(axis.title.y = element_text(), axis.title.x = element_blank())+
theme(legend.position="top", legend.direction = "horizontal", legend.title = element_blank())+
scale_color_manual(label = "2020", values = col1)+
scale_fill_manual(label = "Range of previous 5 years", values = col4)
#Save File
ggsave(fig_3, file = "./Figures/EL_Fig_3_Adjusted_Demand_Over_Recent_Years.png",
width = 12, height = 8)## Warning: Removed 13 row(s) containing missing values (geom_path).
#Display
fig_3## Warning: Removed 13 row(s) containing missing values (geom_path).
Perhaps the pandemic economy has different patterns of electric consumption, beyond the dip in electric consumption. Lets look at electric usage throughout the day, by month.
fig_5_df <- electric_tac_hourly %>%
mutate(year = as.character(year(date)),
month = month(date, label = TRUE)) %>%
filter(year %in% c(2019,2020))%>%
group_by(year, month, OPR_HR, TAC_AREA_NAME) %>%
summarize(MW = mean(MW, na.rm = TRUE))%>%
group_by(year, month, OPR_HR) %>%
summarize(MW = sum(MW))## `summarise()` regrouping output by 'year', 'month', 'OPR_HR' (override with `.groups` argument)
## `summarise()` regrouping output by 'year', 'month' (override with `.groups` argument)
ggplot(data = fig_5_df, aes(x = OPR_HR, y = MW, col = year, group = year))+
geom_line()+
facet_wrap(~ month, ncol = 4)+
theme_minimal()