"""Probablistic forecast error metrics."""
import numpy as np
[docs]def brier_score(obs, fx, fx_prob):
"""Brier Score (BS).
BS = 1/n sum_{i=1}^n (f_i - o_i)^2
where n is the number of forecasts, f_i is the forecasted probability of
event i, and o_i is the observed event indicator (o_i=0: event did not
occur, o_i=1: event occured). The forecasts are supplied as the
right-hand-side of a CDF interval, e.g., forecast <= 10 MW at time i, and
therefore o_i is defined as:
o_i = 1 if obs_i <= fx_i, else o_i = 0
where fx_i and obs_i are the forecast and observation at time i,
respectively.
Parameters
----------
obs : (n,) array_like
Observations (physical unit).
fx : (n,) array_like
Forecasts (physical units) of the right-hand-side of a CDF interval,
e.g., fx = 10 MW is interpreted as forecasting <= 10 MW.
fx_prob : (n,) array_like
Probability [%] associated with the forecasts.
Returns
-------
bs : float
The Brier Score [unitless], bounded between 0 and 1, where values
closer to 0 indicate better forecast performance and values closer to 1
indicate worse performance.
Notes
-----
The Brier Score implemented in this function is for binary outcomes only,
rather than the more general (but less commonly used) categorical version.
"""
# event: 0=did not happen, 1=did happen
o = np.where(obs <= fx, 1.0, 0.0)
# forecast probabilities [unitless]
f = fx_prob / 100.0
bs = np.mean((f - o) ** 2)
return bs
[docs]def brier_skill_score(obs, fx, fx_prob, ref, ref_prob):
"""Brier Skill Score (BSS).
BSS = 1 - BS_fx / BS_ref
where BS_fx is the Brier Score of the evaluated forecast and BS_ref is the
Brier Score of a reference forecast.
Parameters
----------
obs : (n,) array_like
Observations (physical unit).
fx : (n,) array_like
Forecasts (physical units) of the right-hand-side of a CDF interval,
e.g., fx = 10 MW is interpreted as forecasting <= 10 MW.
fx_prob : (n,) array_like
Probability [%] associated with the forecasts.
ref : (n,) array_like
Reference forecast (physical units) of the right-hand-side of a CDF
interval.
ref_prob : (n,) array_like
Probability [%] associated with the reference forecast.
Returns
-------
skill : float
The Brier Skill Score [unitless].
"""
bs_fx = brier_score(obs, fx, fx_prob)
bs_ref = brier_score(obs, ref, ref_prob)
skill = 1.0 - bs_fx / bs_ref
return skill
[docs]def quantile_score(obs, fx, fx_prob):
"""Quantile Score (QS).
.. math::
\\text{QS} = \\frac{1}{n} \\sum_{i=1}^n (fx_i - obs_i) * (p - 1\\{obs_i > fx_i\\})
where :math:`n` is the number of forecasts, :math:`obs_i` is an
observation, :math:`fx_i` is a forecast, :math:`1\\{obs_i > fx_i\\}` is an
indicator function (1 if :math:`obs_i > fx_i`, 0 otherwise) and :math:`p`
is the probability that :math:`obs_i <= fx_i`. [1]_ [2]_
If :math:`obs > fx`, then we have:
.. math::
(fx - obs) < 0 \\\\
(p - 1\\{obs > fx\\}) = (p - 1) <= 0 \\\\
(fx - obs) * (p - 1) >= 0
If instead :math:`obs < fx`, then we have:
.. math::
(fx - obs) > 0 \\\\
(p - 1\\{obs > fx\\}) = (p - 0) >= 0 \\\\
(fx - obs) * p >= 0
Therefore, the quantile score is non-negative regardless of the obs and fx.
Parameters
----------
obs : (n,) array_like
Observations (physical unit).
fx : (n,) array_like
Forecasts (physical units) of the right-hand-side of a CDF interval,
e.g., fx = 10 MW is interpreted as forecasting <= 10 MW.
fx_prob : (n,) array_like
Probability [%] associated with the forecasts.
Returns
-------
qs : float
The Quantile Score, with the same units as the observations.
Notes
-----
Quantile score is meant to be computed for a single probability of
:math:`n` samples.
Examples
--------
>>> obs = 100 # observation [MW]
>>> fx = 80 # forecast [MW]
>>> fx_prob = 60 # probability [%]
>>> quantile_score(obs, fx, fx_prob) # score [MW]
8.0
References
----------
.. [1] Koenker and Bassett, Jr. (1978) "Regression Quantiles", Econometrica
46 (1), pp. 33-50. doi: 10.2307/1913643
.. [2] Wilks (2020) "Forecast Verification". In "Statistical Methods in the
Atmospheric Sciences" (3rd edition). Academic Press. ISBN: 9780123850225
""" # NOQA: E501,W605
# Prob(obs <= fx) = p
p = fx_prob / 100.0
qs = np.mean((fx - obs) * (p - np.where(obs > fx, 1.0, 0.0)))
return qs
[docs]def quantile_skill_score(obs, fx, fx_prob, ref, ref_prob):
"""Quantile Skill Score (QSS).
.. math::
\\text{QSS} = 1 - \\text{QS}_{\\text{fx}} / \\text{QS}_{\\text{ref}}
where :math:`\\text{QS}_{\\text{fx}}` is the Quantile Score of the
evaluated forecast and :math:`\\text{QS}_{\\text{ref}}` is the Quantile
Score of a reference forecast. [1]_
Parameters
----------
obs : (n,) array_like
Observations (physical unit).
fx : (n,) array_like
Forecasts (physical units) of the right-hand-side of a CDF interval,
e.g., fx = 10 MW is interpreted as forecasting <= 10 MW.
fx_prob : (n,) array_like
Probability [%] associated with the forecasts.
ref : (n,) array_like
Reference forecast (physical units) of the right-hand-side of a CDF
interval.
ref_prob : (n,) array_like
Probability [%] associated with the reference forecast.
Returns
-------
skill : float
The Quantile Skill Score [unitless].
References
----------
.. [1] Bouallegue, Pinson and Friederichs (2015) "Quantile forecast
discrimination ability and value", Quarterly Journal of the Royal
Meteorological Society 141, pp. 3415-3424. doi: 10.1002/qj.2624
Notes
-----
This function returns 0 if QS_fx and QS_ref are both 0.
See Also
--------
:py:func:`solarforecastarbiter.metrics.probabilistic.quantile_score`
"""
qs_fx = quantile_score(obs, fx, fx_prob)
qs_ref = quantile_score(obs, ref, ref_prob)
# avoid 0 / 0 --> nan
if qs_fx == qs_ref:
return 0.0
elif qs_ref == 0.0:
# avoid divide by 0
# typically caused by deadbands and short time periods
return np.NINF
else:
return 1.0 - qs_fx / qs_ref
def _unique_forecasts(f):
"""Convert forecast probabilities to a set of unique values.
Determine a set of unique forecast probabilities, based on input forecast
probabilities of arbitrary precision, and approximate the input
probabilities to lie within the set of unique values.
Parameters
----------
f : (n,) array_like
Probability [unitless] associated with the forecasts.
Returns
-------
f_uniq : (n,) array_like
The converted forecast probabilities [unitless].
Notes
-----
This implementation determines the set of unique forecast probabilities by
rounding the input probabilities to a precision determined by the number of
input probability values: if less than 1000 samples, bin by tenths;
otherwise bin by hundredths.
Examples
--------
>>> f = np.array([0.1234, 0.156891, 0.10561])
>>> _unique_forecasts(f)
array([0.1, 0.2, 0.1])
"""
if len(f) >= 1000:
n_decimals = 2 # bin by hundredths (0.01, 0.02, etc.)
else:
n_decimals = 1 # bin by tenths (0.1, 0.2, etc.)
f_uniq = np.around(f, decimals=n_decimals)
return f_uniq
[docs]def brier_decomposition(obs, fx, fx_prob):
"""The 3-component decomposition of the Brier Score.
BS = REL - RES + UNC
where REL is the reliability, RES is the resolution and UNC is the
uncertatinty.
Parameters
----------
obs : (n,) array_like
Observations (physical unit).
fx : (n,) array_like
Forecasts (physical units) of the right-hand-side of a CDF interval,
e.g., fx = 10 MW is interpreted as forecasting <= 10 MW.
fx_prob : (n,) array_like
Probability [%] associated with the forecasts.
Returns
-------
rel : float
The reliability of the forecast [unitless], where a perfectly reliable
forecast has value of 0.
res : float
The resolution of the forecast [unitless], where higher values are
better.
unc : float
The uncertainty [unitless], where lower values indicate the event being
forecasted occurs rarely.
Notes
-----
The current implementation iterates over the unique forecasts to compute
the reliability and resolution, rather than using a vectorized formulation.
While a vectorized formulation may be more computationally efficient, the
clarity of the iterate version outweighs the efficiency gains from the
vectorized version. Additionally, the number of unique forecasts is
currently capped at 100, which small enough that there is likely no
practical difference in computation time between the iterate vs vectorized
versions.
"""
# event: 0=did not happen, 1=did happen
o = np.where(obs <= fx, 1.0, 0.0)
# forecast probabilities [unitless]
f = fx_prob / 100.0
# get unique forecast probabilities by binning
f = _unique_forecasts(f)
# reliability and resolution
rel, res = 0.0, 0.0
o_avg = np.mean(o)
for f_i, N_i in np.nditer(np.unique(f, return_counts=True)):
o_i = np.mean(o[f == f_i]) # mean event value per set
rel += N_i * (f_i - o_i) ** 2
res += N_i * (o_i - o_avg) ** 2
rel /= len(f)
res /= len(f)
# uncertainty
base_rate = np.mean(o)
unc = base_rate * (1.0 - base_rate)
return rel, res, unc
[docs]def reliability(obs, fx, fx_prob):
"""Reliability (REL) of the forecast.
REL = 1/n sum_{i=1}^I N_i (f_i - o_{i,avg})^2
where n is the total number of forecasts, I is the number of unique
forecasts (f_1, f_2, ..., f_I), N_i is the number of times each unique
forecast occurs, o_{i,avg} is the average of the observed events during
which the forecast was f_i.
Parameters
----------
obs : (n,) array_like
Observations (physical unit).
fx : (n,) array_like
Forecasts (physical units) of the right-hand-side of a CDF interval,
e.g., fx = 10 MW is interpreted as forecasting <= 10 MW.
fx_prob : (n,) array_like
Probability [%] associated with the forecasts.
Returns
-------
rel : float
The reliability of the forecast [unitless], where a perfectly reliable
forecast has value of 0.
See Also
--------
brier_decomposition : 3-component decomposition of the Brier Score
"""
rel = brier_decomposition(obs, fx, fx_prob)[0]
return rel
[docs]def resolution(obs, fx, fx_prob):
"""Resolution (RES) of the forecast.
RES = 1/n sum_{i=1}^I N_i (o_{i,avg} - o_{avg})^2
where n is the total number of forecasts, I is the number of unique
forecasts (f_1, f_2, ..., f_I), N_i is the number of times each unique
forecast occurs, o_{i,avg} is the average of the observed events during
which the forecast was f_i, and o_{avg} is the average of all observed
events.
Parameters
----------
obs : (n,) array_like
Observations (physical unit).
fx : (n,) array_like
Forecasts (physical units) of the right-hand-side of a CDF interval,
e.g., fx = 10 MW is interpreted as forecasting <= 10 MW.
fx_prob : (n,) array_like
Probability [%] associated with the forecasts.
Returns
-------
res : float
The resolution of the forecast [unitless], where higher values are
better.
See Also
--------
brier_decomposition : 3-component decomposition of the Brier Score
"""
res = brier_decomposition(obs, fx, fx_prob)[1]
return res
[docs]def uncertainty(obs, fx, fx_prob):
"""Uncertainty (UNC) of the forecast.
UNC = base_rate * (1 - base_rate)
where base_rate = 1/n sum_{i=1}^n o_i, and o_i is the observed event.
Parameters
----------
obs : (n,) array_like
Observations (physical unit).
fx : (n,) array_like
Forecasts (physical units) of the right-hand-side of a CDF interval,
e.g., fx = 10 MW is interpreted as forecasting <= 10 MW.
fx_prob : (n,) array_like
Probability [%] associated with the forecasts.
Returns
-------
unc : float
The uncertainty [unitless], where lower values indicate the event being
forecasted occurs rarely.
See Also
--------
brier_decomposition : 3-component decomposition of the Brier Score
"""
unc = brier_decomposition(obs, fx, fx_prob)[2]
return unc
[docs]def sharpness(fx_lower, fx_upper):
"""Sharpness (SH).
SH = 1/n sum_{i=1}^n (f_{u,i} - f_{l,i})
where n is the total number of forecasts, f_{u,i} is the upper prediction
interval value and f_{l,i} is the lower prediction interval value for
sample i.
Parameters
----------
fx_lower : (n,) array_like
The lower prediction interval values (physical units).
fx_upper : (n,) array_like
The upper prediction interval values (physical units).
Returns
-------
SH : float
The sharpness (physical units), where smaller sharpness values indicate
"tighter" prediction intervals.
"""
sh = np.mean(fx_upper - fx_lower)
return sh
[docs]def continuous_ranked_probability_score(obs, fx, fx_prob):
"""Continuous Ranked Probability Score (CRPS).
.. math::
\\text{CRPS} = \\frac{1}{n} \\sum_{i=1}^n \\int_{-\\infty}^{\\infty}
(F_i(x) - \\mathbf{1} \\{x \\geq y_i \\})^2 dx
where :math:`F_i(x)` is the CDF of the forecast at time :math:`i`,
:math:`y_i` is the observation at time :math:`i`, and :math:`\\mathbf{1}`
is the indicator function that transforms the observation into a step
function (1 if :math:`x \\geq y`, 0 if :math:`x < y`). In other words, the
CRPS measures the difference between the forecast CDF and the empirical CDF
of the observation. The CRPS has the same units as the observation. Lower
CRPS values indicate more accurate forecasts, where a CRPS of 0 indicates a
perfect forecast. [1]_ [2]_ [3]_
Parameters
----------
obs : (n,) array_like
Observations (physical unit).
fx : (n, d) array_like
Forecasts (physical units) of the right-hand-side of a CDF with d
intervals (d >= 2), e.g., fx = [10 MW, 20 MW, 30 MW] is interpreted as
<= 10 MW, <= 20 MW, <= 30 MW.
fx_prob : (n, d) array_like
Probability [%] associated with the forecasts.
Returns
-------
crps : float
The Continuous Ranked Probability Score, with the same units as the
observation.
Raises
------
ValueError
If the forecasts have incorrect dimensions; either a) the forecasts are
for a single sample (n=1) with d CDF intervals but are given as a 1D
array with d values or b) the forecasts are given as 2D arrays (n,d)
but do not contain at least 2 CDF intervals (i.e. d < 2).
Notes
-----
The CRPS can be calculated analytically when the forecast CDF is of a
continuous parametric distribution, e.g., Gaussian distribution. However,
since the Solar Forecast Arbiter makes no assumptions regarding how a
probabilistic forecast was generated, the CRPS is instead calculated using
numerical integration of the discretized forecast CDF. Therefore, the
accuracy of the CRPS calculation is limited by the precision of the
forecast CDF. In practice, this means the forecast CDF should 1) consist of
at least 10 intervals and 2) cover probabilities from 0% to 100%.
References
----------
.. [1] Matheson and Winkler (1976) "Scoring rules for continuous
probability distributions." Management Science, vol. 22, pp.
1087-1096. doi: 10.1287/mnsc.22.10.1087
.. [2] Hersbach (2000) "Decomposition of the continuous ranked probability
score for ensemble prediction systems." Weather Forecast, vol. 15,
pp. 559-570. doi: 10.1175/1520-0434(2000)015<0559:DOTCRP>2.0.CO;2
.. [3] Wilks (2019) "Statistical Methods in the Atmospheric Sciences", 4th
ed. Oxford; Waltham, MA; Academic Press.
"""
# match observations to fx shape: (n,) => (n, d)
if np.ndim(fx) < 2:
raise ValueError("forecasts must be 2D arrays (expected (n,d), got"
f"{np.shape(fx)})")
elif np.shape(fx)[1] < 2:
raise ValueError("forecasts must have d >= 2 CDF intervals "
f"(expected >= 2, got {np.shape(fx)[1]})")
n = len(fx)
# extend CDF min to ensure obs within forecast support
# fx.shape = (n, d) ==> (n, d + 1)
fx_min = np.minimum(obs, fx[:, 0])
fx = np.hstack([fx_min[:, np.newaxis], fx])
fx_prob = np.hstack([np.zeros([n, 1]), fx_prob])
# extend CDF max to ensure obs within forecast support
# fx.shape = (n, d + 1) ==> (n, d + 2)
idx = (fx[:, -1] < obs)
fx_max = np.maximum(obs, fx[:, -1])
fx = np.hstack([fx, fx_max[:, np.newaxis]])
fx_prob = np.hstack([fx_prob, np.full([n, 1], 100)])
# indicator function:
# - left of the obs is 0.0
# - obs and right of the obs is 1.0
o = np.where(fx >= obs[:, np.newaxis], 1.0, 0.0)
# correct behavior when obs > max fx:
# - should be 0 over range: max fx < x < obs
o[idx, -1] = 0.0
# forecast probabilities [unitless]
f = fx_prob / 100.0
# integrate along each sample, then average all samples
crps = np.mean(np.trapz((f - o) ** 2, x=fx, axis=1))
return crps
[docs]def crps_skill_score(obs, fx, fx_prob, ref, ref_prob):
"""CRPS skill score.
CRPSS = 1 - CRPS_fx / CRPS_ref
where CRPS_fx is the CPRS of the evaluated forecast and CRPS_ref is the
CRPS of a reference forecast.
Parameters
----------
obs : (n,) array_like
Observations (physical unit).
fx : (n, d) array_like
Forecasts (physical units) of the right-hand-side of a CDF with d
intervals (d >= 2), e.g., fx = [10 MW, 20 MW, 30 MW] is interpreted as
<= 10 MW, <= 20 MW, <= 30 MW.
fx_prob : (n,) array_like
Probability [%] associated with the forecasts.
ref : (n, d) array_like
Reference forecasts (physical units) of the right-hand-side of a CDF
with d intervals (d >= 2), e.g., fx = [10 MW, 20 MW, 30 MW] is
interpreted as <= 10 MW, <= 20 MW, <= 30 MW.
ref_prob : (n,) array_like
Probability [%] associated with the reference forecast.
Returns
-------
skill : float
The CRPS skill score [unitless].
See Also
--------
:py:func:`solarforecastarbiter.metrics.probabilistic.continuous_ranked_probability_score`
"""
if np.isscalar(ref):
return np.nan
else:
crps_fx = continuous_ranked_probability_score(obs, fx, fx_prob)
crps_ref = continuous_ranked_probability_score(obs, ref, ref_prob)
if crps_fx == crps_ref:
return 0.0
elif crps_ref == 0.0:
# avoid divide by zero
return np.NINF
else:
return 1 - crps_fx / crps_ref
# Add new metrics to this map to map shorthand to function
_MAP = {
'bs': (brier_score, 'BS'),
'bss': (brier_skill_score, 'BSS'),
'rel': (reliability, 'REL'),
'res': (resolution, 'RES'),
'unc': (uncertainty, 'UNC'),
'qs': (quantile_score, 'QS'),
'qss': (quantile_skill_score, 'QSS'),
# 'sh': (sharpness, 'SH'), # TODO
'crps': (continuous_ranked_probability_score, 'CRPS'),
'crpss': (crps_skill_score, 'CRPSS'),
}
__all__ = [m[0].__name__ for m in _MAP.values()]
# Functions that require a reference forecast
_REQ_REF_FX = ['bss', 'qss', 'crpss']
# Functions that require normalized factor
_REQ_NORM = []
# Functions that require full distribution forecasts (as 2dim)
_REQ_DIST = ['crps', 'crpss']
# TODO: Functions that require two forecasts (e.g., sharpness)
# _REQ_FX_FX = ['sh']