Source code for solarforecastarbiter.metrics.probabilistic

"""Probablistic forecast error metrics."""

import numpy as np


[docs]def brier_score(obs, fx, fx_prob):
    """Brier Score (BS).

        BS = 1/n sum_{i=1}^n (f_i - o_i)^2

    where n is the number of forecasts, f_i is the forecasted probability of
    event i, and o_i is the observed event indicator (o_i=0: event did not
    occur, o_i=1: event occured). The forecasts are supplied as the
    right-hand-side of a CDF interval, e.g., forecast <= 10 MW at time i, and
    therefore o_i is defined as:

        o_i = 1 if obs_i <= fx_i, else o_i = 0

    where fx_i and obs_i are the forecast and observation at time i,
    respectively.

    Parameters
    ----------
    obs : (n,) array_like
        Observations (physical unit).
    fx : (n,) array_like
        Forecasts (physical units) of the right-hand-side of a CDF interval,
        e.g., fx = 10 MW is interpreted as forecasting <= 10 MW.
    fx_prob : (n,) array_like
        Probability [%] associated with the forecasts.

    Returns
    -------
    bs : float
        The Brier Score [unitless], bounded between 0 and 1, where values
        closer to 0 indicate better forecast performance and values closer to 1
        indicate worse performance.

    Notes
    -----
    The Brier Score implemented in this function is for binary outcomes only,
    rather than the more general (but less commonly used) categorical version.

    """

    # event: 0=did not happen, 1=did happen
    o = np.where(obs <= fx, 1.0, 0.0)

    # forecast probabilities [unitless]
    f = fx_prob / 100.0

    bs = np.mean((f - o) ** 2)
    return bs


[docs]def brier_skill_score(obs, fx, fx_prob, ref, ref_prob):
    """Brier Skill Score (BSS).

        BSS = 1 - BS_fx / BS_ref

    where BS_fx is the Brier Score of the evaluated forecast and BS_ref is the
    Brier Score of a reference forecast.

    Parameters
    ----------
    obs : (n,) array_like
        Observations (physical unit).
    fx : (n,) array_like
        Forecasts (physical units) of the right-hand-side of a CDF interval,
        e.g., fx = 10 MW is interpreted as forecasting <= 10 MW.
    fx_prob : (n,) array_like
        Probability [%] associated with the forecasts.
    ref : (n,) array_like
        Reference forecast (physical units) of the right-hand-side of a CDF
        interval.
    ref_prob : (n,) array_like
        Probability [%] associated with the reference forecast.

    Returns
    -------
    skill : float
        The Brier Skill Score [unitless].

    """
    bs_fx = brier_score(obs, fx, fx_prob)
    bs_ref = brier_score(obs, ref, ref_prob)
    skill = 1.0 - bs_fx / bs_ref
    return skill


[docs]def quantile_score(obs, fx, fx_prob):
    """Quantile Score (QS).

    .. math::

        \\text{QS} = \\frac{1}{n} \\sum_{i=1}^n (fx_i - obs_i) * (p - 1\\{obs_i > fx_i\\})

    where :math:`n` is the number of forecasts, :math:`obs_i` is an
    observation, :math:`fx_i` is a forecast, :math:`1\\{obs_i > fx_i\\}` is an
    indicator function (1 if :math:`obs_i > fx_i`, 0 otherwise) and :math:`p`
    is the probability that :math:`obs_i <= fx_i`. [1]_ [2]_

    If :math:`obs > fx`, then we have:

    .. math::

        (fx - obs) < 0 \\\\
        (p - 1\\{obs > fx\\}) = (p - 1) <= 0 \\\\
        (fx - obs) * (p - 1) >= 0

    If instead :math:`obs < fx`, then we have:

    .. math::

        (fx - obs) > 0 \\\\
        (p - 1\\{obs > fx\\}) = (p - 0) >= 0 \\\\
        (fx - obs) * p >= 0

    Therefore, the quantile score is non-negative regardless of the obs and fx.

    Parameters
    ----------
    obs : (n,) array_like
        Observations (physical unit).
    fx : (n,) array_like
        Forecasts (physical units) of the right-hand-side of a CDF interval,
        e.g., fx = 10 MW is interpreted as forecasting <= 10 MW.
    fx_prob : (n,) array_like
        Probability [%] associated with the forecasts.

    Returns
    -------
    qs : float
        The Quantile Score, with the same units as the observations.

    Notes
    -----
    Quantile score is meant to be computed for a single probability of
    :math:`n` samples.

    Examples
    --------
    >>> obs = 100     # observation [MW]
    >>> fx = 80       # forecast [MW]
    >>> fx_prob = 60  # probability [%]
    >>> quantile_score(obs, fx, fx_prob)   # score [MW]
    8.0

    References
    ----------
    .. [1] Koenker and Bassett, Jr. (1978) "Regression Quantiles", Econometrica
       46 (1), pp. 33-50. doi: 10.2307/1913643
    .. [2] Wilks (2020) "Forecast Verification". In "Statistical Methods in the
       Atmospheric Sciences" (3rd edition). Academic Press. ISBN: 9780123850225

    """  # NOQA: E501,W605

    # Prob(obs <= fx) = p
    p = fx_prob / 100.0
    qs = np.mean((fx - obs) * (p - np.where(obs > fx, 1.0, 0.0)))
    return qs


[docs]def quantile_skill_score(obs, fx, fx_prob, ref, ref_prob):
    """Quantile Skill Score (QSS).

    .. math::

        \\text{QSS} = 1 - \\text{QS}_{\\text{fx}} / \\text{QS}_{\\text{ref}}

    where :math:`\\text{QS}_{\\text{fx}}` is the Quantile Score of the
    evaluated forecast and :math:`\\text{QS}_{\\text{ref}}` is the Quantile
    Score of a reference forecast. [1]_

    Parameters
    ----------
    obs : (n,) array_like
        Observations (physical unit).
    fx : (n,) array_like
        Forecasts (physical units) of the right-hand-side of a CDF interval,
        e.g., fx = 10 MW is interpreted as forecasting <= 10 MW.
    fx_prob : (n,) array_like
        Probability [%] associated with the forecasts.
    ref : (n,) array_like
        Reference forecast (physical units) of the right-hand-side of a CDF
        interval.
    ref_prob : (n,) array_like
        Probability [%] associated with the reference forecast.

    Returns
    -------
    skill : float
        The Quantile Skill Score [unitless].

    References
    ----------
    .. [1] Bouallegue, Pinson and Friederichs (2015) "Quantile forecast
       discrimination ability and value", Quarterly Journal of the Royal
       Meteorological Society 141, pp. 3415-3424. doi: 10.1002/qj.2624

    Notes
    -----
    This function returns 0 if QS_fx and QS_ref are both 0.

    See Also
    --------
    :py:func:`solarforecastarbiter.metrics.probabilistic.quantile_score`

    """

    qs_fx = quantile_score(obs, fx, fx_prob)
    qs_ref = quantile_score(obs, ref, ref_prob)

    # avoid 0 / 0 --> nan
    if qs_fx == qs_ref:
        return 0.0
    elif qs_ref == 0.0:
        # avoid divide by 0
        # typically caused by deadbands and short time periods
        return np.NINF
    else:
        return 1.0 - qs_fx / qs_ref


def _unique_forecasts(f):
    """Convert forecast probabilities to a set of unique values.

    Determine a set of unique forecast probabilities, based on input forecast
    probabilities of arbitrary precision, and approximate the input
    probabilities to lie within the set of unique values.

    Parameters
    ----------
    f : (n,) array_like
        Probability [unitless] associated with the forecasts.

    Returns
    -------
    f_uniq : (n,) array_like
        The converted forecast probabilities [unitless].

    Notes
    -----
    This implementation determines the set of unique forecast probabilities by
    rounding the input probabilities to a precision determined by the number of
    input probability values: if less than 1000 samples, bin by tenths;
    otherwise bin by hundredths.

    Examples
    --------
    >>> f = np.array([0.1234, 0.156891, 0.10561])
    >>> _unique_forecasts(f)
    array([0.1, 0.2, 0.1])

    """

    if len(f) >= 1000:
        n_decimals = 2  # bin by hundredths (0.01, 0.02, etc.)
    else:
        n_decimals = 1  # bin by tenths (0.1, 0.2, etc.)

    f_uniq = np.around(f, decimals=n_decimals)
    return f_uniq


[docs]def brier_decomposition(obs, fx, fx_prob):
    """The 3-component decomposition of the Brier Score.

        BS = REL - RES + UNC

    where REL is the reliability, RES is the resolution and UNC is the
    uncertatinty.

    Parameters
    ----------
    obs : (n,) array_like
        Observations (physical unit).
    fx : (n,) array_like
        Forecasts (physical units) of the right-hand-side of a CDF interval,
        e.g., fx = 10 MW is interpreted as forecasting <= 10 MW.
    fx_prob : (n,) array_like
        Probability [%] associated with the forecasts.

    Returns
    -------
    rel : float
        The reliability of the forecast [unitless], where a perfectly reliable
        forecast has value of 0.
    res : float
        The resolution of the forecast [unitless], where higher values are
        better.
    unc : float
        The uncertainty [unitless], where lower values indicate the event being
        forecasted occurs rarely.

    Notes
    -----
    The current implementation iterates over the unique forecasts to compute
    the reliability and resolution, rather than using a vectorized formulation.
    While a vectorized formulation may be more computationally efficient, the
    clarity of the iterate version outweighs the efficiency gains from the
    vectorized version. Additionally, the number of unique forecasts is
    currently capped at 100, which small enough that there is likely no
    practical difference in computation time between the iterate vs vectorized
    versions.

    """

    # event: 0=did not happen, 1=did happen
    o = np.where(obs <= fx, 1.0, 0.0)

    # forecast probabilities [unitless]
    f = fx_prob / 100.0

    # get unique forecast probabilities by binning
    f = _unique_forecasts(f)

    # reliability and resolution
    rel, res = 0.0, 0.0
    o_avg = np.mean(o)
    for f_i, N_i in np.nditer(np.unique(f, return_counts=True)):
        o_i = np.mean(o[f == f_i])      # mean event value per set
        rel += N_i * (f_i - o_i) ** 2
        res += N_i * (o_i - o_avg) ** 2
    rel /= len(f)
    res /= len(f)

    # uncertainty
    base_rate = np.mean(o)
    unc = base_rate * (1.0 - base_rate)

    return rel, res, unc


[docs]def reliability(obs, fx, fx_prob):
    """Reliability (REL) of the forecast.

        REL = 1/n sum_{i=1}^I N_i (f_i - o_{i,avg})^2

    where n is the total number of forecasts, I is the number of unique
    forecasts (f_1, f_2, ..., f_I), N_i is the number of times each unique
    forecast occurs, o_{i,avg} is the average of the observed events during
    which the forecast was f_i.

    Parameters
    ----------
    obs : (n,) array_like
        Observations (physical unit).
    fx : (n,) array_like
        Forecasts (physical units) of the right-hand-side of a CDF interval,
        e.g., fx = 10 MW is interpreted as forecasting <= 10 MW.
    fx_prob : (n,) array_like
        Probability [%] associated with the forecasts.

    Returns
    -------
    rel : float
        The reliability of the forecast [unitless], where a perfectly reliable
        forecast has value of 0.

    See Also
    --------
    brier_decomposition : 3-component decomposition of the Brier Score

    """

    rel = brier_decomposition(obs, fx, fx_prob)[0]
    return rel


[docs]def resolution(obs, fx, fx_prob):
    """Resolution (RES) of the forecast.

        RES = 1/n sum_{i=1}^I N_i (o_{i,avg} - o_{avg})^2

    where n is the total number of forecasts, I is the number of unique
    forecasts (f_1, f_2, ..., f_I), N_i is the number of times each unique
    forecast occurs, o_{i,avg} is the average of the observed events during
    which the forecast was f_i, and o_{avg} is the average of all observed
    events.

    Parameters
    ----------
    obs : (n,) array_like
        Observations (physical unit).
    fx : (n,) array_like
        Forecasts (physical units) of the right-hand-side of a CDF interval,
        e.g., fx = 10 MW is interpreted as forecasting <= 10 MW.
    fx_prob : (n,) array_like
        Probability [%] associated with the forecasts.

    Returns
    -------
    res : float
        The resolution of the forecast [unitless], where higher values are
        better.

    See Also
    --------
    brier_decomposition : 3-component decomposition of the Brier Score

    """

    res = brier_decomposition(obs, fx, fx_prob)[1]
    return res


[docs]def uncertainty(obs, fx, fx_prob):
    """Uncertainty (UNC) of the forecast.

        UNC = base_rate * (1 - base_rate)

    where base_rate = 1/n sum_{i=1}^n o_i, and o_i is the observed event.

    Parameters
    ----------
    obs : (n,) array_like
        Observations (physical unit).
    fx : (n,) array_like
        Forecasts (physical units) of the right-hand-side of a CDF interval,
        e.g., fx = 10 MW is interpreted as forecasting <= 10 MW.
    fx_prob : (n,) array_like
        Probability [%] associated with the forecasts.

    Returns
    -------
    unc : float
        The uncertainty [unitless], where lower values indicate the event being
        forecasted occurs rarely.

    See Also
    --------
    brier_decomposition : 3-component decomposition of the Brier Score

    """

    unc = brier_decomposition(obs, fx, fx_prob)[2]
    return unc


[docs]def sharpness(fx_lower, fx_upper):
    """Sharpness (SH).

        SH = 1/n sum_{i=1}^n (f_{u,i} - f_{l,i})

    where n is the total number of forecasts, f_{u,i} is the upper prediction
    interval value and f_{l,i} is the lower prediction interval value for
    sample i.

    Parameters
    ----------
    fx_lower : (n,) array_like
        The lower prediction interval values (physical units).
    fx_upper : (n,) array_like
        The upper prediction interval values (physical units).

    Returns
    -------
    SH : float
        The sharpness (physical units), where smaller sharpness values indicate
        "tighter" prediction intervals.

    """
    sh = np.mean(fx_upper - fx_lower)
    return sh


[docs]def continuous_ranked_probability_score(obs, fx, fx_prob):
    """Continuous Ranked Probability Score (CRPS).

    .. math::

        \\text{CRPS} = \\frac{1}{n} \\sum_{i=1}^n \\int_{-\\infty}^{\\infty}
        (F_i(x) - \\mathbf{1} \\{x \\geq y_i \\})^2 dx

    where :math:`F_i(x)` is the CDF of the forecast at time :math:`i`,
    :math:`y_i` is the observation at time :math:`i`, and :math:`\\mathbf{1}`
    is the indicator function that transforms the observation into a step
    function (1 if :math:`x \\geq y`, 0 if :math:`x < y`). In other words, the
    CRPS measures the difference between the forecast CDF and the empirical CDF
    of the observation. The CRPS has the same units as the observation. Lower
    CRPS values indicate more accurate forecasts, where a CRPS of 0 indicates a
    perfect forecast. [1]_ [2]_ [3]_

    Parameters
    ----------
    obs : (n,) array_like
        Observations (physical unit).
    fx : (n, d) array_like
        Forecasts (physical units) of the right-hand-side of a CDF with d
        intervals (d >= 2), e.g., fx = [10 MW, 20 MW, 30 MW] is interpreted as
        <= 10 MW, <= 20 MW, <= 30 MW.
    fx_prob : (n, d) array_like
        Probability [%] associated with the forecasts.

    Returns
    -------
    crps : float
        The Continuous Ranked Probability Score, with the same units as the
        observation.

    Raises
    ------
    ValueError
        If the forecasts have incorrect dimensions; either a) the forecasts are
        for a single sample (n=1) with d CDF intervals but are given as a 1D
        array with d values or b) the forecasts are given as 2D arrays (n,d)
        but do not contain at least 2 CDF intervals (i.e. d < 2).

    Notes
    -----
    The CRPS can be calculated analytically when the forecast CDF is of a
    continuous parametric distribution, e.g., Gaussian distribution. However,
    since the Solar Forecast Arbiter makes no assumptions regarding how a
    probabilistic forecast was generated, the CRPS is instead calculated using
    numerical integration of the discretized forecast CDF. Therefore, the
    accuracy of the CRPS calculation is limited by the precision of the
    forecast CDF. In practice, this means the forecast CDF should 1) consist of
    at least 10 intervals and 2) cover probabilities from 0% to 100%.

    References
    ----------
    .. [1] Matheson and Winkler (1976) "Scoring rules for continuous
           probability distributions." Management Science, vol. 22, pp.
           1087-1096. doi: 10.1287/mnsc.22.10.1087
    .. [2] Hersbach (2000) "Decomposition of the continuous ranked probability
           score for ensemble prediction systems." Weather Forecast, vol. 15,
           pp. 559-570. doi: 10.1175/1520-0434(2000)015<0559:DOTCRP>2.0.CO;2
    .. [3] Wilks (2019) "Statistical Methods in the Atmospheric Sciences", 4th
           ed. Oxford; Waltham, MA; Academic Press.

    """

    # match observations to fx shape: (n,) => (n, d)
    if np.ndim(fx) < 2:
        raise ValueError("forecasts must be 2D arrays (expected (n,d), got"
                         f"{np.shape(fx)})")
    elif np.shape(fx)[1] < 2:
        raise ValueError("forecasts must have d >= 2 CDF intervals "
                         f"(expected >= 2, got {np.shape(fx)[1]})")

    n = len(fx)

    # extend CDF min to ensure obs within forecast support
    # fx.shape = (n, d) ==> (n, d + 1)
    fx_min = np.minimum(obs, fx[:, 0])
    fx = np.hstack([fx_min[:, np.newaxis], fx])
    fx_prob = np.hstack([np.zeros([n, 1]), fx_prob])

    # extend CDF max to ensure obs within forecast support
    # fx.shape = (n, d + 1) ==> (n, d + 2)
    idx = (fx[:, -1] < obs)
    fx_max = np.maximum(obs, fx[:, -1])
    fx = np.hstack([fx, fx_max[:, np.newaxis]])
    fx_prob = np.hstack([fx_prob, np.full([n, 1], 100)])

    # indicator function:
    # - left of the obs is 0.0
    # - obs and right of the obs is 1.0
    o = np.where(fx >= obs[:, np.newaxis], 1.0, 0.0)

    # correct behavior when obs > max fx:
    # - should be 0 over range: max fx < x < obs
    o[idx, -1] = 0.0

    # forecast probabilities [unitless]
    f = fx_prob / 100.0

    # integrate along each sample, then average all samples
    crps = np.mean(np.trapz((f - o) ** 2, x=fx, axis=1))

    return crps


[docs]def crps_skill_score(obs, fx, fx_prob, ref, ref_prob):
    """CRPS skill score.

        CRPSS = 1 - CRPS_fx / CRPS_ref

    where CRPS_fx is the CPRS of the evaluated forecast and CRPS_ref is the
    CRPS of a reference forecast.

    Parameters
    ----------
    obs : (n,) array_like
        Observations (physical unit).
    fx : (n, d) array_like
        Forecasts (physical units) of the right-hand-side of a CDF with d
        intervals (d >= 2), e.g., fx = [10 MW, 20 MW, 30 MW] is interpreted as
        <= 10 MW, <= 20 MW, <= 30 MW.
    fx_prob : (n,) array_like
        Probability [%] associated with the forecasts.
    ref : (n, d) array_like
        Reference forecasts (physical units) of the right-hand-side of a CDF
        with d intervals (d >= 2), e.g., fx = [10 MW, 20 MW, 30 MW] is
        interpreted as <= 10 MW, <= 20 MW, <= 30 MW.
    ref_prob : (n,) array_like
        Probability [%] associated with the reference forecast.

    Returns
    -------
    skill : float
        The CRPS skill score [unitless].

    See Also
    --------
    :py:func:`solarforecastarbiter.metrics.probabilistic.continuous_ranked_probability_score`

    """

    if np.isscalar(ref):
        return np.nan
    else:
        crps_fx = continuous_ranked_probability_score(obs, fx, fx_prob)
        crps_ref = continuous_ranked_probability_score(obs, ref, ref_prob)

        if crps_fx == crps_ref:
            return 0.0
        elif crps_ref == 0.0:
            # avoid divide by zero
            return np.NINF
        else:
            return 1 - crps_fx / crps_ref


# Add new metrics to this map to map shorthand to function
_MAP = {
    'bs': (brier_score, 'BS'),
    'bss': (brier_skill_score, 'BSS'),
    'rel': (reliability, 'REL'),
    'res': (resolution, 'RES'),
    'unc': (uncertainty, 'UNC'),
    'qs': (quantile_score, 'QS'),
    'qss': (quantile_skill_score, 'QSS'),
    # 'sh': (sharpness, 'SH'),  # TODO
    'crps': (continuous_ranked_probability_score, 'CRPS'),
    'crpss': (crps_skill_score, 'CRPSS'),
}

__all__ = [m[0].__name__ for m in _MAP.values()]

# Functions that require a reference forecast
_REQ_REF_FX = ['bss', 'qss', 'crpss']

# Functions that require normalized factor
_REQ_NORM = []

# Functions that require full distribution forecasts (as 2dim)
_REQ_DIST = ['crps', 'crpss']

# TODO: Functions that require two forecasts (e.g., sharpness)
# _REQ_FX_FX = ['sh']