Energy price convergence in 2021#
In this example, we will take a look at energy price convergence.
To do this, we’ll look at price forecasts from (30-minute) pre-dispatch (PREDISPATCH) and 5-minute pre-dispatch (P5MIN).
We’ll also look at the relationship between demand forecast error and energy price convergence.
Note
While we use price error to refer to the difference between actual prices and forecast prices in some parts of this example, we prefer price convergence. The intention of P5MIN
and PREDISPATCH
is to provide AEMO and participants with up-to-date system and market information. A potential outcome of this is that participants change their decisions (e.g. rebidding, as we note here). As such, prices reported in P5MIN
and PREDISPATCH
should not be strictly interpreted as forecasts.
Key imports#
# standard libraries
import logging
from datetime import datetime, timedelta
from pathlib import Path
from typing import Tuple
# NEM data libraries
# NEMOSIS for actual demand data
# NEMSEER for forecast demand data
import nemosis
from nemseer import compile_data, download_raw_data, generate_runtimes
# data wrangling libraries
import pandas as pd
import xarray as xr
# interactive plotting
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
# static plotting
import matplotlib
import matplotlib.pyplot as plt
# silence NEMSEER and NEMOSIS logging
logging.getLogger("nemosis").setLevel(logging.WARNING)
logging.getLogger("nemseer").setLevel(logging.ERROR)
Plot Styling#
nemseer_template = dict(
layout=go.Layout(
font_family="Source Sans 3",
title_font_size=24,
title_x=0.05,
plot_bgcolor="#f0f0f0",
colorway=px.colors.qualitative.Bold,
)
)
plt.style.use(Path("styling", "matplotlib_styling.mplstyle"))
Defining our analysis start and end dates#
analysis_start = "2021/01/01 00:00:00"
analysis_end = "2022/01/01 00:00:00"
Getting Data#
In AEMO data tables, the energy price in $/MW/hr is usually found in the RRP
column.
We will focus on 5-minute dispatch interval prices.
Note
Prior to midnight 30 September (commencement of 5-minute settlement), market settlement prices for energy were calculated as the average of the 6 dispatch interval prices in a half hour window.
Obtaining actual price data from NEMOSIS
#
We will download DISPATCHPRICE
to access the RRP
(energy price) field and cache it so that it’s ready for computation.
nemosis_cache = Path("nemosis_cache/")
if not nemosis_cache.exists():
nemosis_cache.mkdir()
nemosis.cache_compiler(
analysis_start, analysis_end, "DISPATCHPRICE", nemosis_cache, fformat="parquet"
)
Obtaining actual demand data from NEMOSIS
#
We can download DISPATCHREGIONSUM
to get actual demand values used in dispatch (TOTALDEMAND
).
nemosis.cache_compiler(
analysis_start, analysis_end, "DISPATCHREGIONSUM", nemosis_cache, fformat="parquet"
)
Obtaining forecast price data from NEMSEER
#
We will download PRICE
to access the RRP
field in PREDISPATCH
forecasts, and REGIONSOLUTION
to access the RRP
field in P5MIN
forecasts. We’ll cache it so that it’s ready for computation.
download_raw_data(
"PREDISPATCH",
"PRICE",
"nemseer_cache/",
forecasted_start=analysis_start,
forecasted_end=analysis_end,
)
download_raw_data(
"P5MIN",
"REGIONSOLUTION",
"nemseer_cache/",
forecasted_start=analysis_start,
forecasted_end=analysis_end,
)
Obtaining forecast demand data from NEMSEER#
We will also download demand data. This is contained within REGIONSOLUTION
for P5MIN
, but we need to get the table REGIONSUM
for PREDISPATCH
.
download_raw_data(
"PREDISPATCH",
"REGIONSUM",
"nemseer_cache/",
forecasted_start=analysis_start,
forecasted_end=analysis_end,
)
Comparing price (and demand) convergence for a particular time#
The code below (toggle to unhide) allows us to plot forecast convergence for price and demand across PREDISPATCH
and P5MIN
.
Note that we adjust the nominal run time to the actual run time for both forecast types.
def plot_price_comparison(time: str, regionid: str) -> go.Figure:
"""
Creates a figure that compares price forecasts from PD & P5MIN against
the actual price.
Args:
time: Datetime string in format YYYY/mm/dd HH:MM:SS
regionid: One of ("NSW1", "QLD1", "VIC1", "TAS1", "SA1")
Returns:
plotly Figure
"""
def get_actual_data(time: str) -> pd.DataFrame:
"""
Gets actual price data
"""
# get actual data from the hour beforehand to the interval of interest
nemosis_window = (
(
datetime.strptime(time, "%Y/%m/%d %H:%M:%S") - timedelta(minutes=60)
).strftime("%Y/%m/%d %H:%M:%S"),
time,
)
nemosis_price = nemosis.dynamic_data_compiler(
nemosis_window[0],
nemosis_window[1],
"DISPATCHPRICE",
nemosis_cache,
filter_cols=["INTERVENTION"],
filter_values=([0],),
)
actual_price = nemosis_price[["SETTLEMENTDATE", "REGIONID", "RRP"]]
return actual_price
def get_forecast_data(time: str) -> Tuple[xr.DataArray, xr.DataArray]:
"""
Gets P5 and PD forecast price data
Also corrects nominal to actual run time
"""
# get P5 and PD forecast data
## get PD data
run_start, run_end = generate_runtimes(time, time, "PREDISPATCH")
pd_price = compile_data(
run_start,
run_end,
time,
time,
"PREDISPATCH",
"PRICE",
"nemseer_cache/",
data_format="xr",
)["PRICE"]["RRP"]
## calculate actual run time from run time, then swap out nominal for actual
pd_price = pd_price.assign_coords(
{"actual_run_time": pd_price.coords["run_time"] - pd.Timedelta(30, "T")}
)
pd_price = pd_price.swap_dims({"run_time": "actual_run_time"}).drop("run_time")
## get P5 data
p5_price = compile_data(
run_start,
run_end,
time,
time,
"P5MIN",
"REGIONSOLUTION",
"nemseer_cache/",
data_format="xr",
)["REGIONSOLUTION"]["RRP"]
## calculate actual run time from run time, then swap out nominal for actual
p5_price = p5_price.assign_coords(
{"actual_run_time": p5_price.coords["run_time"] - pd.Timedelta(5, "T")}
)
p5_price = p5_price.swap_dims({"run_time": "actual_run_time"}).drop("run_time")
return pd_price, p5_price
actual_price = get_actual_data(time).query("REGIONID==@regionid")
pd_price, p5_price = get_forecast_data(time)
pd_price = pd_price.sel(REGIONID=regionid)
p5_price = p5_price.sel(REGIONID=regionid)
# create plotly figure
fig = go.Figure(
layout=dict(
title=f"Energy Price Comparison: {regionid} {time} <br><sup>Actual run times for forecasts</sup>",
template=nemseer_template
),
)
fig.add_traces(
[
go.Scatter(
x=pd_price.isel(forecasted_time=0).to_dataframe().index,
y=pd_price.isel(forecasted_time=0).to_dataframe()["RRP"],
name="PD",
mode="lines",
),
go.Scatter(
x=p5_price.isel(forecasted_time=0).to_dataframe().index,
y=p5_price.isel(forecasted_time=0).to_dataframe()["RRP"],
name="P5",
mode="lines",
),
go.Scatter(
x=actual_price["SETTLEMENTDATE"],
y=actual_price["RRP"],
name="Actual",
mode="lines",
),
]
)
return fig
def plot_demand_comparison(time: str, regionid: str) -> go.Figure:
"""
Creates a figure that compares demand forecasts from PD & P5MIN against
the actual demand used in dispatch.
Args:
time: Datetime string in format YYYY/mm/dd HH:MM:SS
regionid: One of ("NSW1", "QLD1", "VIC1", "TAS1", "SA1")
Returns:
plotly Figure
"""
def get_actual_data(time: str) -> pd.DataFrame:
"""
Gets actual demand data
"""
# get actual data from the hour beforehand to the interval of interest
nemosis_window = (
(
datetime.strptime(time, "%Y/%m/%d %H:%M:%S") - timedelta(minutes=60)
).strftime("%Y/%m/%d %H:%M:%S"),
time,
)
nemosis_demand = nemosis.dynamic_data_compiler(
nemosis_window[0],
nemosis_window[1],
"DISPATCHREGIONSUM",
nemosis_cache,
filter_cols=["INTERVENTION"],
filter_values=([0],),
)
actual_demand = nemosis_demand[["SETTLEMENTDATE", "REGIONID", "TOTALDEMAND"]]
return actual_demand.sort_values("SETTLEMENTDATE")
def get_forecast_data(time: str) -> Tuple[xr.DataArray, xr.DataArray]:
"""
Gets P5 and PD forecast demand data
Also corrects nominal to actual run time
"""
# get P5 and PD forecast data
## get PD data
run_start, run_end = generate_runtimes(time, time, "PREDISPATCH")
pd_demand = compile_data(
run_start,
run_end,
time,
time,
"PREDISPATCH",
"REGIONSUM",
"nemseer_cache/",
data_format="xr",
)["REGIONSUM"]["TOTALDEMAND"]
## calculate actual run time from run time, then swap out nominal for actual
pd_demand = pd_demand.assign_coords(
{"actual_run_time": pd_demand.coords["run_time"] - pd.Timedelta(30, "T")}
)
pd_demand = pd_demand.swap_dims({"run_time": "actual_run_time"}).drop(
"run_time"
)
## get P5 data
p5_demand = compile_data(
run_start,
run_end,
time,
time,
"P5MIN",
"REGIONSOLUTION",
"nemseer_cache/",
data_format="xr",
)["REGIONSOLUTION"]["TOTALDEMAND"]
## calculate actual run time from run time, then swap out nominal for actual
p5_demand = p5_demand.assign_coords(
{"actual_run_time": p5_demand.coords["run_time"] - pd.Timedelta(5, "T")}
)
p5_demand = p5_demand.swap_dims({"run_time": "actual_run_time"}).drop(
"run_time"
)
return pd_demand, p5_demand
actual_demand = get_actual_data(time).query("REGIONID==@regionid")
pd_demand, p5_demand = get_forecast_data(time)
pd_demand = pd_demand.sel(REGIONID=regionid)
p5_demand = p5_demand.sel(REGIONID=regionid)
# create plotly figure
fig = go.Figure(
layout=dict(
title=f"Demand Comparison: {regionid} {time} <br><sup>Actual run times for forecasts</sup>",
template=nemseer_template
),
)
fig.add_traces(
[
go.Scatter(
x=pd_demand.isel(forecasted_time=0).to_dataframe().index,
y=pd_demand.isel(forecasted_time=0).to_dataframe()["TOTALDEMAND"],
name="PD",
mode="lines",
),
go.Scatter(
x=p5_demand.isel(forecasted_time=0).to_dataframe().index,
y=p5_demand.isel(forecasted_time=0).to_dataframe()["TOTALDEMAND"],
name="P5",
mode="lines",
),
go.Scatter(
x=actual_demand["SETTLEMENTDATE"],
y=actual_demand["TOTALDEMAND"],
mode="lines",
name="Actual",
),
]
)
return fig
Example: Plotting price and demand convergence on a summer’s evening#
Let’s plot an example - a summer’s evening ramp event
time = "2021/12/30 18:00:00"
price_comp = plot_price_comparison(time, "NSW1")
demand_comp = plot_demand_comparison(time, "NSW1")
Looking at price convergence over the year#
To try and look at convergence a bit more systematically, we’ll compute the “price error” across 2021.
The code below obtains P5MIN
and PREDISPATCH
price forecasts, removes overlapping forecasted periods and calculates a “price error”.
The last two
PREDISPATCH
forecasts overlap withP5MIN
These are removed from
PREDISPATCH
def calculate_price_error(analysis_start: str, analysis_end: str) -> pd.DataFrame:
"""
Calculates price error in PREDISPATCH and P5MIN forecasts for periods between
analysis_start and analysis_end.
Args:
analysis_start: Start datetime, YYYY/mm/dd HH:MM:SS
analysis_end: End datetime, YYYY/mm/dd HH:MM:SS
Returns:
DataFrame with computed price error mapped to the ahead time of the
forecast and the forecasted time.
"""
def get_actual_price_data() -> pd.DataFrame:
"""
Gets actual price data
"""
# get actual demand data for forecasted_time
# nemosis start time must precede end of interval of interest by 5 minutes
nemosis_window = (
(
datetime.strptime(analysis_start, "%Y/%m/%d %H:%M:%S")
- timedelta(minutes=5)
).strftime("%Y/%m/%d %H:%M:%S"),
analysis_end,
)
nemosis_price = nemosis.dynamic_data_compiler(
nemosis_window[0],
nemosis_window[1],
"DISPATCHPRICE",
nemosis_cache,
filter_cols=["INTERVENTION"],
filter_values=([0],),
)
actual_price = nemosis_price[["SETTLEMENTDATE", "REGIONID", "RRP"]]
actual_price = actual_price.rename(
columns={"SETTLEMENTDATE": "forecasted_time"}
)
return actual_price
def get_forecast_price_data(ftype: str) -> pd.DataFrame:
"""
Get price forecast data for the analysis period given a particular forecast type
Args:
ftype: 'P5MIN' or 'PREDISPATCH'
Returns:
DataFrame with price forecast data
"""
# ftype mappings
table = {"PREDISPATCH": "PRICE", "P5MIN": "REGIONSOLUTION"}
run_col = {"PREDISPATCH": "PREDISPATCH_RUN_DATETIME", "P5MIN": "RUN_DATETIME"}
forecasted_col = {"PREDISPATCH": "DATETIME", "P5MIN": "INTERVAL_DATETIME"}
# get run times
forecasts_run_start, forecasts_run_end = generate_runtimes(
analysis_start, analysis_end, ftype
)
df = compile_data(
forecasts_run_start,
forecasts_run_end,
analysis_start,
analysis_end,
ftype,
table[ftype],
"nemseer_cache/",
)[table[ftype]]
# remove intervention periods
df = df.query("INTERVENTION == 0")
# rename run and forecasted time cols
df = df.rename(
columns={
run_col[ftype]: "run_time",
forecasted_col[ftype]: "forecasted_time",
}
)
# ensure values are sorted by forecasted and run times for nth groupby operation
return df[["run_time", "forecasted_time", "REGIONID", "RRP"]].sort_values(
["forecasted_time", "run_time"]
)
def combine_pd_p5_forecasts(
p5_df: pd.DataFrame, pd_df: pd.DataFrame
) -> pd.DataFrame:
"""
Combines P5 and PD forecasts, including removing PD overlap with P5
"""
# remove PD overlap with P5MIN
pd_nooverlap = pd_df.groupby(
["forecasted_time", "REGIONID"], as_index=False
).nth(slice(None, -2))
# concatenate and rename RRP to reflect that these are forecasted values
forecast_prices = pd.concat([pd_nooverlap, p5_df], axis=0).sort_values(
["forecasted_time", "actual_run_time"]
)
forecast_prices = forecast_prices.rename(columns={"RRP": "FORECASTED_RRP"})
return forecast_prices
def process_price_error(
forecast_prices: pd.DataFrame, actual_price: pd.DataFrame
) -> pd.DataFrame:
"""
Merges actual and forecast prices and calculates ahead time and price error
"""
# left merge to ensure each forecasted price is mapped to its corresponding actual price
all_prices = pd.merge(
forecast_prices,
actual_price,
how="left",
on=["forecasted_time", "REGIONID"],
)
all_prices["ahead_time"] = (
all_prices["forecasted_time"] - all_prices["actual_run_time"]
)
all_prices["error"] = all_prices["RRP"] - all_prices["FORECASTED_RRP"]
price_error = all_prices.drop(
columns=["RRP", "FORECASTED_RRP", "actual_run_time"]
)
return price_error
p5_df = get_forecast_price_data("P5MIN")
pd_df = get_forecast_price_data("PREDISPATCH")
# calulate actual run time for each forecast type
p5_df["actual_run_time"] = p5_df["run_time"] - pd.Timedelta(minutes=5)
pd_df["actual_run_time"] = pd_df["run_time"] - pd.Timedelta(minutes=30)
p5_df = p5_df.drop(columns="run_time")
pd_df = pd_df.drop(columns="run_time")
# get forecast prices
forecast_prices = combine_pd_p5_forecasts(p5_df, pd_df)
# actual prices
actual_price = get_actual_price_data()
price_error = process_price_error(forecast_prices, actual_price)
return price_error
price_error = calculate_price_error(analysis_start, analysis_end)
Absolute price error for each region, by dispatch interval and ahead time#
Below we plot absolute price error (absolute value of price error) for each region across the year for forecasts:
5 minutes ahead
1 hour ahead
5 hours ahead
24 hours ahead
Since forecasts 1+ hour ahead are only produced by PREDISPATCH
, and because PREDISPATCH
is only run every half hour, we will only plot price errors for intervals that end on the half hour.
Extract ahead times of interest for each region#
regions = ("QLD1", "NSW1", "VIC1", "TAS1", "SA1")
aheads = [dict(minutes=5), dict(hours=1), dict(hours=5), dict(days=1)]
region_price_errors = {}
for region in regions:
region_df = price_error.query("REGIONID==@region").set_index("forecasted_time")
# only keep select ahead times
ahead_df = region_df[region_df["ahead_time"].isin(aheads)]
ahead_errors = []
# manual pivot to bring data for each ahead time into columns
for ahead in reversed(aheads):
time_unit = list(ahead.keys())[0]
ahead_time = list(ahead.values())[0]
desc = f"{ahead_time} {time_unit} ahead"
ahead_errors.append(
region_df[region_df["ahead_time"] == timedelta(**ahead)]["error"].rename(
desc
)
)
# create a new DataFrame from ahead time series
# drop any rows with NAs. These will be intervals not on the half hour
ahead_df = pd.concat(ahead_errors, axis=1).dropna(axis=0)
region_price_errors[region] = ahead_df.abs()
abs_price_error_figs = []
for region in regions:
fig = px.line(
region_price_errors[region],
title=(
f"{region} Absolute Price Error "
+ "<br><sup>Half-hourly dispatch intervals only</sup>"
),
template=nemseer_template
)
fig.update_layout(yaxis_title="Price Error ($/MW/h)", xaxis_title="Forecasted Time")
abs_price_error_figs.append(fig)
Zoom in and see how price error changes across forecast ahead times when:
Price errors are large
Price errors are relatively small