1 Introduction

This workbook scrapes, cleans, and analyzes electric demand data from the CA-ISO. Electrical consumption has recently received attention as an high frequency alternative measure of macroeconomic activity. For example, nation-wide energy consumption is one of several factors in the Federal Reserve Bank of New York’s Weekly Economic Index. The goal of this analysis is to evaluate electrical consumption as proxy for output, particularly in the context of the pandemic induced recession. This script is written to query all data necessary, bit will use local data as available.

Code is published at: (https://github.com/noahkouchekinia/CAISO)[https://github.com/noahkouchekinia/CAISO]

2 Set Up

2.1 Set Up Environment

#Set working directory
  setwd("/home/noah/Documents/Code Samples/CAISO")

#Load Necessary Packages
  library(tidyverse)
  library(lubridate)
  library(ggplot2)
  library(tis)
  library(tis)
  library(jsonlite)
  library(tigris)
  library(zoo)
  library(stargazer)

2.2 Define Useful Functions

#A function to take the difference between UK and CA time on any given day.
#Later, a API call will require times in GMT+-0, rather than local time. 
#The offset is not a constant of 8, because of different daylight saving schedules.
  tz_offset <- function(date,tz_a = "America/Los_Angeles",tz_b = "Europe/London"){
     a_time <- date %>%
                as.character() %>%
                paste("12:00:00")%>%
                as.POSIXct(tz_a)%>%
                with_tz(tz = tz_b)%>%
                hour()
    b_time <- date %>%
                as.character() %>%
                paste("12:00:00")%>%
                as.POSIXct(tz_a)%>%
                hour()
    offset <- a_time-b_time
    return(offset)
  }

#Colors

col1 <- "#2e5e8b"
col2 <- "#8baf3e"
col3 <- "#fcc62d"
col4 <- "#88cde5"
col5 <- "#474747"

#Get BOD Theme
#   source("S:/Monroe/ERcharts/r_backend/function.R")

3 Data Collection

3.1 Scrape Latest CA-ISO Data

Here, I query from the California Independent System Operator’s (ISO). Their OASIS API allowed me to pull hour by hour demand. However, querying from this API is challenging. Documentation is limited. Furthermore, the API limits the amount of data that can be downloaded with each pull, requiring many repetitive pulls.

#Set query parameters
  start_date     <- as.Date("2011-06-01") #Set to after PGE structural break
  end_date       <- Sys.Date()
  market_process <- "ACTUAL"              #Select from: 2DA, 7DA, DAM, ACTUAL, RTM

#Check for data downloaded in previous scrapes, adjust scraping start date accordingly
  extant_data <- read.csv("/home/noah/Documents/Code Samples/CAISO/Data/Actual Hourly Integrated Load by Region.csv") #CHANGE THIS TO RELATIVE PATH BEFORE PUB
  extant_data$X <- NULL
  extant_data$OPR_DT <- as.Date(extant_data$OPR_DT)
  
  n_start_date <- max(start_date, max(extant_data$OPR_DT-1, na.rm = TRUE))

#Prepare dates used to loop through portions of days (30 at a time)
  #Create Vector of start and end dates
    all_dates <- as.Date(n_start_date:end_date,origin="1970-01-01") #all dates desired
    start_dates <- c(n_start_date, all_dates[1:length(all_dates)%%30==0]) #start dates every 30 days
    end_dates   <- unique(c(start_dates[2:length(start_dates)],end_date)) #offset start dates to get end dates
    if(length(start_dates)==1){end_dates <- end_date} #needed in case of only one scraping period
  #Get correct dates to include tz offset 
    start_offset <- tz_offset(start_dates)
    end_offset <- tz_offset(end_dates)
  #Create list the length of start_dates, in order to hold query results  
    data_list <- vector("list",length(start_dates))

#Set other query parameters
  q_base <- "http://oasis.caiso.com/oasisapi/SingleZip?queryname=SLD_FCST"
  q_process <-  paste0("market_run_id=",market_process)
  q_format <-  "resultformat=6"
  q_v <- "version=1"

#Pull one thirty day period at a time
  for(i in 1:length(start_dates)){
    #Set variable query components
      q_start_date <-  paste0("startdatetime=",format(start_dates[i], "%Y%m%d"),"T0",start_offset[i],":00-0000")
      q_end_date <-  paste0("enddatetime=",format(end_dates[i], "%Y%m%d"),"T0", end_offset[i],":00-0000")
      query <-paste(q_base, q_process, q_format, q_start_date, q_end_date, q_v, sep="&") #Query URL for the OASIS API
    #Assemble query
      query <-paste(q_base, q_process, q_format, q_start_date, q_end_date, q_v, sep="&") #Query URL for the OASIS API
    #Download Query
      tf = tempfile() #create temporary destination file
      download.file(query,tf) #download file to tempfile
      fname = unzip(tf,list=T)$Name[1] #Unzip downloaded file
      data_list[[i]] <- read.csv(unz(tf, fname)) #Read delimited data from file
      Sys.sleep(5) #Pause, so as not to sent overlapping queries
    }

#Clean and export choice cuts of data
  caiso_df <- data_list[unlist(lapply(data_list,ncol))==14] %>%
     bind_rows()%>%
     select(c("OPR_DT","OPR_HR","TAC_AREA_NAME","LABEL","POS","MW"))%>%
     mutate(OPR_DT = as.Date(OPR_DT),
            TAC_AREA_NAME <- as.character(TAC_AREA_NAME))%>%
     bind_rows(extant_data)%>%
     group_by(OPR_DT,OPR_HR,TAC_AREA_NAME) %>%
     summarize(LABEL = last(LABEL), POS = last(POS),MW = last(MW)) %>%
     mutate(date = OPR_DT, hour_f = factor(OPR_HR))

## `summarise()` regrouping output by 'OPR_DT', 'OPR_HR' (override with `.groups` argument)

 #Save Data
  write.csv(caiso_df, "/home/noah/Documents/Code Samples/CAISO//Data/Actual Hourly Integrated Load by Region.csv",row.names = FALSE)
  rm(list = c("market_process", "extant_data","all_dates","end_dates","end_offset","fname","i","n_start_date",
              "q_base", "q_end_date", "q_format", "q_process", "q_start_date","q_v","query","start_dates",
              "start_offset", "tf"))

3.2 Pull Degree Day Data

As I will soon demonstrate, electrical demand data is extremely volatile, and depends on things unrelated to economic output. Perhaps the largest contributor is weather; HVAC systems use a lot of energy. The NOAA produces releases “degree day” data, which are a measure of how much the temperature in a region deviates from room temperature, weighted by population. These degree day series are used by utilities to model and predict electrical consumption. We will use them to correct our electrical output series for weather.

#Set Query Parameters
  url_start <- "https://ftp.cpc.ncep.noaa.gov/htdocs/degree_days/weighted/daily_data/"
  years <- year(start_date):year(end_date)
  categories <-c("/StatesCONUS.Cooling","/StatesCONUS.Heating")
  extension <- ".txt"
  
#Create list to hold results
  heat_tables <- cool_tables <-vector("list", length=length(years))
  
#Pull data year by year and category by categry (heat and cool degree days) with loops
#Double looping is not great, but iterations are low here.
  i <- 1
  for(year in years){
    for(category in categories){
      api_call <- paste0(url_start,year,category,extension)
      table <- read.table(api_call, sep = "|", skip = 3, stringsAsFactors = FALSE)
      table <- t(table)
      colnames(table) <- table[1,]
      colnames(table)[1] <- "date"
      table <- table[-1,]
      table <- as.data.frame(table, stringsAsFactors = FALSE)
      if(grepl("Heat",category)) heat_tables[[i]] <- table
      if(grepl("Cool",category)) cool_tables[[i]] <- table
    }
    i = i+1
  }
  
#Combine and Clean results
  heat_data <- bind_rows(heat_tables)%>%
    select(date,CA)%>%
    rename(heat = CA)
  cool_data <- bind_rows(cool_tables)%>%
    select(date,CA)%>%
    rename(cool = CA)
  degree_days <- full_join(heat_data, cool_data)%>%
    mutate(date = as.Date(date,format = "%Y%m%d"),
           heat = as.numeric(heat),
           cool = as.numeric(cool))

## Joining, by = "date"

#Conclude 
  write.csv(degree_days, "~/Documents/Code Samples/CAISO//Data/degree_days.csv", row.names = FALSE)

3.3 Pull Solar Radiaion Series

California has a sizable and increasing amount of rooftop solar. If residents use rooftop solar to offset consumption of electricity from the grid, the trend and cyclical components of rooftop solar generation need to be removed from our electricity demanded series.

3.3.1 Daylight Cycle

Perhaps the crudest way to get at solar generation capacity is the amount of daylight hours per day. This is cyclical throughout the year.

#I need to pick a single point to query to represent Calfiornia. Let's use the population centroid
#Source: https://www2.census.gov/geo/docs/reference/cenpop2010/CenPop2010_Mean_ST.txt
  ca_pop_centroid <- c(+35.463595,-119.325359)

#Check for extant data, let's not query data we already have.
  extant_exist <- file.exists("./Data/daylight.csv")
  if(extant_exist){
    extant_data <- read.csv(file = "./Data/daylight.csv")
    extant_dates <- as.Date(extant_data$date)
    date_range <- as.Date(start_date:end_date, origin ="1970-01-01")
    query_dates <- date_range[!date_range %in% extant_dates]
  }
  if(!extant_exist){
    query_dates <- as.Date(start_date:end_date, origin ="1970-01-01")
  }
  
#Query API if nessesary
  query_needed <- length(query_dates) > 0
  if(query_needed){
    #generate api calls
      api_calls <- paste0("https://api.sunrise-sunset.org/json?",
                          "lat=", ca_pop_centroid[1],
                          "&lng=",ca_pop_centroid[2],
                          "&date=",query_dates,
                          "&formatted=0")
    #Set vectors to store query results
      sunrise <- character(length=length(api_calls))
      sunset <- character(length=length(api_calls))
      day_length <- character(length=length(api_calls))
    
    #Loop through api queries
      for(i in 1:length(api_calls)){
        results <-fromJSON(url(api_calls[i]))[[1]]
        sunrise[i]    <-results$sunrise[1]
        sunset[i]     <-results$sunset[1]
        day_length[i] <-results$day_length[1]
      }
    #create data.frame
      daylight <- data.frame(date = query_dates, sunrise, sunset, day_length)
    #combine with old data, if nessesary
      if(extant_exist){
        daylight <- rbind(daylight, extant_data)
      }
    #write for future use
        write.csv(daylight, file = "./Data/daylight.csv", row.names = FALSE)

  }
  if(!query_needed){
    daylight <- extant_data
  }
#Encode date collumn
  daylight$date <- as.Date(daylight$date )

3.3.2 Downward Solar Radiation Data

This portion is a work in progress.

#Get California Geographic boundaries
#I am ignoring islands, they are not on in CA-ISO anyways.
  us_shape <- tigris::states()
  ca_geo <- as.data.frame(us_shape[us_shape$STUSPS == "CA","geometry"][[1]][[1]][[1]][[1]])
  ca_geo_query <- paste(paste(ca_geo$V1,ca_geo$V2, sep = "+"),collapse = "%")

#Set up other components of query
  api_key = 'NOlwzKodWvBC5ZmHixTB1elMbiOiEZ8fNKDocxac'
  wkt = paste0("POLYGON(",ca_geo_query,")")
  attributes = "dhi,dni,ghi"
  leap_year = 'true'
  interval = '60'
  utc = 'false'
  your_name = 'Noah+Kouchekinia'
  reason_for_use = 'electric+demand+correcting'
  your_affiliation = 'FRBSF'
  your_email = 'n.a.kouchekinia@sf.frb.org'
  mailing_list = 'false'
  
#Build Query
    url = paste0("https://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?",
                 "wkt=POLYGON(",ca_geo_query,")&",
                 "names=2020&",
                 "leap_day=",leap_year,"&",
                 "interval=",interval,"&",
                 "utc=",utc,"&",
                 "full_name=",your_name,"&",
                 "email=",your_email,"&",
                 "affiliation=",your_affiliation,"&",
                 "mailing_list=",mailing_list,"&",
                 "reason=",reason_for_use,"&",
                 "api_key=",api_key,"&",
                 "attributes=",attributes)
 radiation_json <- read_json(url)

3.3.3 Installed Solar Capacity

This portion is a work in progress

#placeholder

3.4 Create Weekday/Holiday Series

Electrical usage also varies with work schedules. We need day-of-the-week and holiday dummies to account for this variation.

#Get Federal Holidays Using the tis package
  holidays <- year(start_date):year(end_date)%>%
    holidays()%>% 
    data.frame(date = ., holiday = names(.))%>%
    mutate(date = as.Date(as.character(date), format = "%Y%m%d"))

#Get Easter using tis package
  easter <-  year(start_date):year(end_date)%>%
    easter()%>% 
    data.frame(date = ., holiday = "easter")%>%
    mutate(date = as.Date(as.character(date), format = "%Y%m%d"))

#Get day of week effects
  day_of_week <- as.Date(start_date:end_date, origin = "1970-01-01") %>%
    as_tibble() %>%
    mutate(date = value, 
           day_of_week = wday(date, label = TRUE))%>%
    select(!value)
  
#Combine 
  day_dummies <- bind_rows(holidays,easter) %>%
    full_join(day_of_week)%>%
    mutate(holiday = replace(holiday, is.na(holiday),"none"),
           holiday = factor(holiday, levels = c("none",unique(holiday)[unique(holiday) != "none"])),
           day_of_week = factor(day_of_week))

## Joining, by = "date"

#Conclude

4 Analysis

4.1 Correcting the Daily Series

4.1.1 Linear Model

Most standard seasonal adjustment does not work on daily data

#Extract TAC*Date Panel from CASIO Data
  electric_tac_hourly <- caiso_df %>%
    filter(!(TAC_AREA_NAME =="CA ISO-TAC") & grepl("-TAC",TAC_AREA_NAME)) %>% #Keep only TAC desegregation level data
    mutate(TAC_AREA_NAME = replace(TAC_AREA_NAME,TAC_AREA_NAME == "MWD-TAC","SCE-TAC"))%>% #join recently split MWD back to SCE
    filter(!TAC_AREA_NAME == "VEA-TAC")%>% #Drop VEA, which is mostly in Nevada
    group_by(date, OPR_HR, TAC_AREA_NAME) %>% 
    summarize(MW = sum(MW))  #Add together MWD & SCE

## `summarise()` regrouping output by 'date', 'OPR_HR' (override with `.groups` argument)

  electric_tac_daily <- electric_tac_hourly %>%
    group_by(date, TAC_AREA_NAME) %>%
    summarize(MW = sum(MW), hours = n()) %>% #Add together hours for daily series
    filter(hours == 24) %>% #Check that days are complete
    select(!hours)

## `summarise()` regrouping output by 'date' (override with `.groups` argument)

#Assemble all data for model
  daily_model_df <- electric_tac_daily %>%
    left_join(degree_days) %>%
    left_join(day_dummies) %>%
    left_join(daylight) %>%
    filter(date >= as.Date(start_date) & date <= as.Date(end_date))

## Joining, by = "date"
## Joining, by = "date"
## Joining, by = "date"

#Model Hourly series
#Here, we want to retain houly variation, but correct for weather and holidays
  daily_model <- lm(MW ~ TAC_AREA_NAME + 
                         TAC_AREA_NAME*heat+ 
                         TAC_AREA_NAME*cool+ 
                         TAC_AREA_NAME*holiday+ 
                         TAC_AREA_NAME*day_of_week+
                         TAC_AREA_NAME*as.numeric(day_length),
              data = daily_model_df, 
              na.action = na.exclude)

  stargazer(daily_model, type = "html")


	Dependent variable:

	MW

TAC_AREA_NAMESCE-TAC	48,938.360^***
	(3,038.912)

TAC_AREA_NAMESDGE-TAC	-160,638.800^***
	(3,038.912)

heat	315.366^***
	(44.044)

cool	6,357.870^***
	(60.584)

holidayMLKing	462.924
	(3,751.402)

holidayGWBirthday	-12,735.310^***
	(3,748.219)

holidayMemorial	-15,173.600^***
	(3,955.890)

holidayIndependence	-15,634.220^***
	(4,157.497)

holidayLabor	-17,905.700^***
	(3,757.980)

holidayColumbus	-5,244.748
	(3,755.142)

holidayVeterans	330.730
	(3,921.827)

holidayThanksgiving	-27,857.300^***
	(3,748.646)

holidayChristmas	-19,382.780^***
	(3,726.673)

holidayNewYears	-18,656.420^***
	(3,728.516)

holidayeaster	-17,639.120^***
	(3,946.800)

day_of_week.L	3,707.617^***
	(527.653)

day_of_week.Q	-25,774.480^***
	(524.275)

day_of_week.C	2,628.248^***
	(526.797)

day_of_week4	-8,011.188^***
	(528.989)

day_of_week5	893.204^*
	(525.747)

day_of_week6	-819.718
	(521.279)

as.numeric(day_length)	0.773^***
	(0.046)

TAC_AREA_NAMESCE-TAC:heat	-809.506^***
	(62.288)

TAC_AREA_NAMESDGE-TAC:heat	-379.974^***
	(62.288)

TAC_AREA_NAMESCE-TAC:cool	2,656.735^***
	(85.679)

TAC_AREA_NAMESDGE-TAC:cool	-4,826.801^***
	(85.679)

TAC_AREA_NAMESCE-TAC:holidayMLKing	-9,008.110^*
	(5,305.284)

TAC_AREA_NAMESDGE-TAC:holidayMLKing	-2,695.387
	(5,305.284)

TAC_AREA_NAMESCE-TAC:holidayGWBirthday	-3,567.764
	(5,300.782)

TAC_AREA_NAMESDGE-TAC:holidayGWBirthday	9,969.429^*
	(5,300.782)

TAC_AREA_NAMESCE-TAC:holidayMemorial	-20,311.600^***
	(5,594.473)

TAC_AREA_NAMESDGE-TAC:holidayMemorial	10,558.490^*
	(5,594.473)

TAC_AREA_NAMESCE-TAC:holidayIndependence	-23,615.120^***
	(5,879.588)

TAC_AREA_NAMESDGE-TAC:holidayIndependence	7,886.702
	(5,879.588)

TAC_AREA_NAMESCE-TAC:holidayLabor	-11,581.610^**
	(5,314.587)

TAC_AREA_NAMESDGE-TAC:holidayLabor	14,005.530^***
	(5,314.587)

TAC_AREA_NAMESCE-TAC:holidayColumbus	10,064.550^*
	(5,310.572)

TAC_AREA_NAMESDGE-TAC:holidayColumbus	6,681.239
	(5,310.572)

TAC_AREA_NAMESCE-TAC:holidayVeterans	-5,382.590
	(5,546.300)

TAC_AREA_NAMESDGE-TAC:holidayVeterans	-2,461.125
	(5,546.300)

TAC_AREA_NAMESCE-TAC:holidayThanksgiving	-8,378.609
	(5,301.386)

TAC_AREA_NAMESDGE-TAC:holidayThanksgiving	22,268.250^***
	(5,301.386)

TAC_AREA_NAMESCE-TAC:holidayChristmas	-14,246.850^***
	(5,270.311)

TAC_AREA_NAMESDGE-TAC:holidayChristmas	14,171.950^***
	(5,270.311)

TAC_AREA_NAMESCE-TAC:holidayNewYears	-8,825.066^*
	(5,272.919)

TAC_AREA_NAMESDGE-TAC:holidayNewYears	14,656.740^***
	(5,272.919)

TAC_AREA_NAMESCE-TAC:holidayeaster	8,321.789
	(5,581.618)

TAC_AREA_NAMESDGE-TAC:holidayeaster	16,379.150^***
	(5,581.618)

TAC_AREA_NAMESCE-TAC:day_of_week.L	1,645.484^**
	(746.213)

TAC_AREA_NAMESDGE-TAC:day_of_week.L	-3,285.199^***
	(746.213)

TAC_AREA_NAMESCE-TAC:day_of_week.Q	-8,470.280^***
	(741.437)

TAC_AREA_NAMESDGE-TAC:day_of_week.Q	20,252.040^***
	(741.437)

TAC_AREA_NAMESCE-TAC:day_of_week.C	743.765
	(745.003)

TAC_AREA_NAMESDGE-TAC:day_of_week.C	-2,061.820^***
	(745.003)

TAC_AREA_NAMESCE-TAC:day_of_week4	-3,488.503^***
	(748.103)

TAC_AREA_NAMESDGE-TAC:day_of_week4	5,980.592^***
	(748.103)

TAC_AREA_NAMESCE-TAC:day_of_week5	601.044
	(743.519)

TAC_AREA_NAMESDGE-TAC:day_of_week5	-521.052
	(743.519)

TAC_AREA_NAMESCE-TAC:day_of_week6	-152.276
	(737.200)

TAC_AREA_NAMESDGE-TAC:day_of_week6	589.508
	(737.200)

TAC_AREA_NAMESCE-TAC:as.numeric(day_length)	-1.169^***
	(0.065)

TAC_AREA_NAMESDGE-TAC:as.numeric(day_length)	-1.185^***
	(0.065)

Constant	230,821.100^***
	(2,148.835)


Observations	10,647
R²	0.989
Adjusted R²	0.989
Residual Std. Error	11,708.080 (df = 10584)
F Statistic	15,780.840^*** (df = 62; 10584)

Note:	p<0.1; p<0.05; p<0.01

  daily_model_df <- daily_model_df %>%
    ungroup()%>%
    mutate(MW_predicted = predict(daily_model),
           MW_corrected = (MW - MW_predicted) + mean(MW_predicted, na.rm = TRUE))%>%
    group_by(date)%>%
    summarize(MW_predicted = sum(MW_predicted), MW = sum(MW), MW_corrected = sum(MW_corrected))

## `summarise()` ungrouping output (override with `.groups` argument)

  # plot(daily_model_df$date, daily_model_df$MW, type = "l")
  # lines(daily_model_df$date, daily_model_df$MW_predicted, type = "l", col = "red")
  # lines(daily_model_df$date, daily_model_df$MW_corrected, type = "l", col = "blue")

4.1.2 Additonal Smoothing

There is some remaining, mostly random, day-to-day variation we are not so interested in. Let’s remove this with a seven day moving average.

  daily_model_df$MW_corrected_MA7 <- rollapply(daily_model_df$MW_corrected, 
                                                      width = 14,
                                                      fill = NA,
                                                      FUN = mean,
                                                      na.rm=TRUE)

 # plot(daily_model_df$date, daily_model_df$MW, type = "l")
 #  lines(daily_model_df$date, daily_model_df$MW_predicted, type = "l", col = "red")
 #  lines(daily_model_df$date, daily_model_df$MW_corrected, type = "l", col = "blue")
 #  lines(daily_model_df$date, daily_model_df$MW_corrected_MA7, type = "l", col = "green")
 #  
 #  plot(daily_model_df$date, daily_model_df$MW_corrected_MA7, type = "l", col = "green")

4.2 Plotting recent (conditional) consumption

4.2.1 Recent Electric Consumption

#Modify Data for plot
  fig_1_df <- daily_model_df %>%
    select(date,MW_corrected,MW_corrected_MA7)%>%
    pivot_longer(-date)%>%
    group_by(name) %>% 
    mutate(mean = mean(value[year(date)==2019], na.rm = TRUE)) %>%
    ungroup() %>%
    mutate(percent_of_mean = (value/mean)*100)

#Set Annotation Points
  ca_sip <- as.Date("2020-03-19")

#Plot Using GGplot
  fig_1 <- ggplot(data = fig_1_df)+
    geom_vline(xintercept = ca_sip, col = "red")+
    geom_line(aes(x=date,y=percent_of_mean, col = name), size = 1.25)+
    scale_x_date(limits = c(as.Date("2020-01-01"), end_date))+
    scale_y_continuous(limits = c(85, 110), labels = function(x){paste0(x-100,"%")}, name = "deviation from mean")+
    scale_color_manual(labels = c("Weather/Weekday Corrected","14 day MA of corrected"),
                         values = c(col4, col1))+
    labs(title = "Recent Change in California Electric Demand",
         caption = "Note: Date of statewide shelter in place order shown in red.\nSource: California ISO" )+
    theme_minimal()+ ##CHANGE TO BOD THEME
    theme(axis.title.y = element_text(), axis.title.x = element_blank())+
    theme(legend.position="top", legend.direction = "horizontal", legend.title = element_blank())
#Save  
  ggsave(fig_1, file = "./Figures/EL_Fig_1_Long_Demand_Series.png", width = 12, height = 8)

## Warning: Removed 6246 row(s) containing missing values (geom_path).

#Display
  fig_1

## Warning: Removed 6246 row(s) containing missing values (geom_path).

4.2.2 Electric Series in Total

#Adjust data for plot
  fig_2_df <- daily_model_df %>%
    select(date, MW, MW_corrected, MW_corrected_MA7)%>%
    pivot_longer(-date)
#Plot with ggplot
  fig_2 <- ggplot(data = fig_2_df)+
    geom_vline(xintercept = ca_sip, col = "red")+
    geom_line(aes(x=date,y=value/1000, col = name))+
    labs(title = "California Electric Usage", 
         caption = "Note: Date of statewide shelter in place order shown in red.\nSource: California ISO",
         y = "Gigawatts")+
    scale_color_manual(labels = c("Raw Demand","Weather/Weekday Corrected","14 day MA of corrected"),
                       values = c(col3,col4,col1))+
    theme_minimal()+ ##CHANGE TO BOD THEME
    theme(axis.title.y = element_text(), axis.title.x = element_blank())+
    theme(legend.position="top", legend.direction = "horizontal", legend.title = element_blank())

#Save
  ggsave(fig_2, file = "./Figures/EL_Fig_2_Long_Demand_Series.png", width = 12, height = 8)

## Warning: Removed 14 row(s) containing missing values (geom_path).

#Display
  fig_2

## Warning: Removed 14 row(s) containing missing values (geom_path).

4.2.3 Electrical Consumption in 2020 Compared to Recent Years

#Structure data for plot
fig_3_area_df <- daily_model_df %>%
  select(date, MW_corrected)%>%
  mutate(year  = as.character(year(date)))%>%
  filter(year %in% as.character(2014:2019))%>%
  mutate(month = month(date),
         day   = day(date))%>%
  mutate(date  = as.Date(paste0("2020-",month,"-",day)))%>%
  select(date, year, MW_corrected)%>%
  group_by(date) %>%
  arrange(year) %>%
  summarize(min = min(MW_corrected), max = max(MW_corrected))%>%
  mutate(min = rollapply(min,width = 14, fill = NA,FUN = mean,na.rm=TRUE))%>%
  mutate(max = rollapply(max,width = 14, fill = NA,FUN = mean,na.rm=TRUE))

## `summarise()` ungrouping output (override with `.groups` argument)

fig_3_line_df <- daily_model_df %>%
  filter(year(date)==2020)


#Plot
  fig_3 <- ggplot(data = fig_3_area_df)+
    geom_vline(xintercept = ca_sip, col = "red")+
    geom_ribbon(data = fig_3_area_df, aes(x=date,ymin = min/1000, ymax = max/1000, fill = "Range of previous 5 years"), col = NA)+
    geom_line(data = fig_3_line_df, aes(x=date,y=MW_corrected_MA7/1000, color = "2020"), size = 1.25)+
    labs(title = "CA ISO Electricity Usage: Comparing Recent Years",
         y = "Gigawatts",
         caption = "Note: Date of statewide shelter in place order shown in red.\nNote: Series are 14 day moving averages of weather/weekday corrected electric demand.\nSource: California ISO")+
    scale_x_date(limits = c(as.Date("2020-01-07"), as.Date("2020-12-24")))+
    theme_minimal()+
    theme(axis.title.y = element_text(), axis.title.x = element_blank())+
    theme(legend.position="top", legend.direction = "horizontal", legend.title = element_blank())+
    scale_color_manual(label = "2020", values = col1)+
    scale_fill_manual(label = "Range of previous 5 years", values = col4)

#Save File
  ggsave(fig_3, file = "./Figures/EL_Fig_3_Adjusted_Demand_Over_Recent_Years.png",
         width = 12, height = 8)

## Warning: Removed 13 row(s) containing missing values (geom_path).

#Display
  fig_3

## Warning: Removed 13 row(s) containing missing values (geom_path).

4.3 Predicting Using the Time Series

4.3.1 Linear Model

4.3.2 MIDAS Model

4.4 Time of Day Patterns

Perhaps the pandemic economy has different patterns of electric consumption, beyond the dip in electric consumption. Lets look at electric usage throughout the day, by month.

fig_5_df <- electric_tac_hourly %>%
  mutate(year = as.character(year(date)),
         month = month(date, label = TRUE)) %>%
  filter(year %in% c(2019,2020))%>%
  group_by(year, month, OPR_HR, TAC_AREA_NAME) %>%
  summarize(MW = mean(MW, na.rm = TRUE))%>%
  group_by(year, month, OPR_HR) %>%
  summarize(MW = sum(MW))

## `summarise()` regrouping output by 'year', 'month', 'OPR_HR' (override with `.groups` argument)

## `summarise()` regrouping output by 'year', 'month' (override with `.groups` argument)

ggplot(data = fig_5_df, aes(x = OPR_HR, y = MW, col = year, group = year))+
  geom_line()+
  facet_wrap(~ month, ncol = 4)+
  theme_minimal()

CA Electric Demand Analysis

Noah Kouchekinia

2/27/2021