Source code for solarforecastarbiter.datamodel

"""
Data classes and acceptable variables as defined by the SolarForecastArbiter
Data Model document. Python 3.7 is required.
"""
from dataclasses import (dataclass, field, fields, MISSING, asdict,
                         replace, is_dataclass)
import datetime
import itertools
import json
from jsonschema import validate
from jsonschema.exceptions import ValidationError
from typing import Optional, Tuple, Union, ClassVar


import numpy as np
import pandas as pd


from solarforecastarbiter.metrics.deterministic import \
    _MAP as deterministic_mapping
from solarforecastarbiter.metrics.deterministic import \
    _FILL_OPTIONS, _COST_FUNCTION_MAP, _AGG_OPTIONS
from solarforecastarbiter.metrics.event import _MAP as event_mapping
from solarforecastarbiter.metrics.probabilistic import \
    _MAP as probabilistic_mapping
from solarforecastarbiter.metrics.summary import (
    _DETERMINISTIC_MAP as summary_deterministic_mapping,
    _EVENT_MAP as summary_event_mapping,
)
from solarforecastarbiter.validation.quality_mapping import \
    DESCRIPTION_MASK_MAPPING, DERIVED_MASKS


DASH_URL = 'https://dashboard.solarforecastarbiter.org'
ALLOWED_VARIABLES = {
    'air_temperature': 'degC',
    'wind_speed': 'm/s',
    'ghi': 'W/m^2',
    'dni': 'W/m^2',
    'dhi': 'W/m^2',
    'poa_global': 'W/m^2',
    'relative_humidity': '%',
    'ac_power': 'MW',
    'dc_power': 'MW',
    'availability': '%',
    'curtailment': 'MW',
    'event': 'boolean',
    'load': 'MW',
    'net_load': 'MW'
}


COMMON_NAMES = {
    'air_temperature': 'Air Temperature',
    'wind_speed': 'Wind Speed',
    'ghi': 'GHI',
    'dni': 'DNI',
    'dhi': 'DHI',
    'poa_global': 'Plane of Array Irradiance',
    'relative_humidity': 'Relative Humidty',
    'ac_power': 'AC Power',
    'dc_power': 'DC Power',
    'availability': 'Availability',
    'curtailment': 'Curtailment',
    'event': 'Event',
    'load': 'Load',
    'net_load': 'Net Load',
}


CLOSED_MAPPING = {
    'event': None,
    'instant': None,
    'beginning': 'left',
    'ending': 'right'
}
ALLOWED_INTERVAL_LABELS = tuple(CLOSED_MAPPING.keys())
ALLOWED_INTERVAL_VALUE_TYPES = (
    'interval_mean', 'interval_max', 'interval_min', 'interval_median',
    'instantaneous')
ALLOWED_AGGREGATE_TYPES = ('sum', 'mean', 'median', 'max', 'min', 'std')


# Keys are the categories passed to pandas groupby, values are the human
# readable versions for plotting and forms.
ALLOWED_CATEGORIES = {
    'total': 'Total',
    'year': 'Year',
    'season': 'Season',
    'month': 'Month of the year',
    'hour': 'Hour of the day',
    'date': 'Date',
    'weekday': 'Day of the week'
}


# sentences/paragraphs that will appear in the report
# under the heading in the key
CATEGORY_BLURBS = {
    'total': "Metric totals for the entire selected period.",
    'year': "Metrics per year.",
    'season': "Metrics per season.",
    'month': "Metrics per month.",
    'hour': "Metrics per hour of the day.",
    'date': "Metrics per individual date.",
    'weekday': "Metrics per day of the week."
}


ALLOWED_DETERMINISTIC_METRICS = {
    k: v[1] for k, v in deterministic_mapping.items()}

ALLOWED_EVENT_METRICS = {k: v[1] for k, v in event_mapping.items()}

ALLOWED_PROBABILISTIC_METRICS = {
    k: v[1] for k, v in probabilistic_mapping.items()}

ALLOWED_METRICS = ALLOWED_DETERMINISTIC_METRICS.copy()
ALLOWED_METRICS.update(ALLOWED_PROBABILISTIC_METRICS)
ALLOWED_METRICS.update(ALLOWED_EVENT_METRICS)

ALLOWED_DETERMINISTIC_SUMMARY_STATISTICS = {
    k: v[1] for k, v in summary_deterministic_mapping.items()
}
ALLOWED_EVENT_SUMMARY_STATISTICS = {
    k: v[1] for k, v in summary_event_mapping.items()
}
ALLOWED_SUMMARY_STATISTICS = ALLOWED_DETERMINISTIC_SUMMARY_STATISTICS.copy()
ALLOWED_SUMMARY_STATISTICS.update(ALLOWED_EVENT_SUMMARY_STATISTICS)


ALLOWED_COST_FUNCTIONS = tuple(_COST_FUNCTION_MAP.keys())
ALLOWED_COST_AGG_OPTIONS = tuple(_AGG_OPTIONS.keys())
ALLOWED_COST_FILL_OPTIONS = tuple(_FILL_OPTIONS.keys())

ALLOWED_QUALITY_FLAGS = tuple([
    k for k in DESCRIPTION_MASK_MAPPING.keys()
    if not k.startswith('VERSION IDENTIFIER') and
    not k.startswith("RESERVED")
]) + tuple(DERIVED_MASKS.keys())


def _time_conv(inp):
    if isinstance(inp, datetime.time):
        return inp.strftime('%H:%M')
    elif isinstance(inp, datetime.datetime):
        return inp.isoformat()
    elif isinstance(inp, pd.Timedelta):
        # convert to integer minutes
        return inp.total_seconds() // 60
    else:
        return inp


def _dict_factory(inp):
    dict_ = {}
    for k, v in dict(inp).items():
        if isinstance(v, tuple):
            dict_[k] = tuple(_time_conv(i) for i in v)
        elif isinstance(v, list):  # pragma: no cover
            dict_[k] = [_time_conv(i) for i in v]
        else:
            dict_[k] = _time_conv(v)
    if 'units' in dict_:
        del dict_['units']
    if 'constant_value_units' in dict_:
        del dict_['constant_value_units']
    if 'data_object' in dict_:
        del dict_['data_object']
    return dict_


def _single_field_processing(model, field, val, field_type=None):
    type_ = field_type or field.type
    if (
            # If the value is already the right type, return
            # typing type_s do not work with isinstance, so check __origin__
            not hasattr(type_, '__origin__') and
            isinstance(val, type_)
    ):
        return val
    elif type_ == pd.Timedelta:
        return pd.Timedelta(f'{val}min')
    elif type_ == pd.Timestamp:
        out = pd.Timestamp(val)
        if pd.isna(out):
            raise ValueError(f'{val} is not a time')
        return out
    elif type_ == datetime.time:
        return datetime.datetime.strptime(val, '%H:%M').time()
    elif (
            is_dataclass(type_) and
            isinstance(val, dict)
    ):
        return type_.from_dict(val)
    elif (
            hasattr(type_, '__origin__') and
            type_.__origin__ is Union
    ):
        # with a Union, we must return the right type
        for ntype in type_.__args__:
            try:
                processed_val = _single_field_processing(
                    model, field, val, ntype
                )
            except (TypeError, ValueError, KeyError):
                continue
            else:
                if not isinstance(processed_val, ntype):
                    continue
                else:
                    return processed_val
        raise TypeError(f'Unable to process {val} as one of {type_.__args__}')
    else:
        return model._special_field_processing(
            model, field, val)


class BaseModel:
    def _special_field_processing(self, model_field, val):
        return val

[docs]    @classmethod
    def from_dict(model, input_dict, raise_on_extra=False):
        """
        Construct a dataclass from the given dict, matching keys with
        the class fields. A KeyError is raised for any missing values.
        If raise_on_extra is True, an errors is raised if keys of the
        dict are also not fields of the dataclass. For pandas.Timedelta
        model fields, it is assumed input_dict contains a number
        representing minutes. For datetime.time model fields, input_dict
        values are assumed to be strings in the %H:%M format. If a
        modeling_parameters field is present, the modeling_parameters
        key from input_dict is automatically parsed into the appropriate
        PVModelingParameters subclass based on tracking_type.

        Parameters
        ----------
        input_dict : dict
            The dict to process into dataclass fields
        raise_on_extra : boolean, default False
            If True, raise an exception on extra keys in input_dict that
            are not dataclass fields.

        Returns
        -------
        model : subclass of BaseModel
            Instance of the desired model.

        Raises
        ------
        KeyError
            For missing required fields or if raise_on_extra is True and
            input_dict contains extra keys.
        ValueError
            If a pandas.Timedelta, pandas.Timestamp, datetime.time, or
            modeling_parameters field cannot be parsed from the input_dict
        TypeError
            If the field has a Union type and the input parameter is not
            processed into one of the Union arguments
        """
        dict_ = input_dict.copy()
        model_fields = fields(model)
        kwargs = {}
        errors = []
        for model_field in model_fields:
            if model_field.name in dict_:
                field_val = dict_[model_field.name]
                if (
                        hasattr(model_field.type, '__origin__') and
                        model_field.type.__origin__ is tuple
                ):
                    out = []
                    default_type = model_field.type.__args__[0]
                    for i, arg in enumerate(field_val):
                        if (
                                i < len(model_field.type.__args__) and
                                model_field.type.__args__[i] is not Ellipsis
                        ):
                            this_type = model_field.type.__args__[i]
                        else:
                            this_type = default_type

                        out.append(
                            _single_field_processing(
                                model, model_field, arg, this_type))
                    kwargs[model_field.name] = tuple(out)
                else:
                    kwargs[model_field.name] = _single_field_processing(
                        model, model_field, field_val)
            elif (
                    model_field.default is MISSING and
                    model_field.default_factory is MISSING and
                    model_field.init
            ):
                errors.append(model_field.name)
        if errors:
            raise KeyError(
                'Missing the following required arguments for the model '
                f'{str(model)}: {", ".join(errors)}')
        names = [f.name for f in model_fields]
        extra = [k for k in dict_.keys() if k not in names]
        if extra and raise_on_extra:
            raise KeyError(
                f'Extra keys for the model {str(model)}: {", ".join(extra)}')
        return model(**kwargs)

[docs]    def to_dict(self):
        """
        Convert the dataclass into a dictionary suitable for uploading to the
        API. This means some types (such as pandas.Timedelta and times) are
        converted to strings.
        """
        # using the dict_factory recurses through all objects for special
        # conversions
        dict_ = asdict(self, dict_factory=_dict_factory)
        return dict_

    def replace(self, **kwargs):
        """
        Convience wrapper for :py:func:`dataclasses.replace` to create a
        new dataclasses from the old with the given keys replaced.
        """
        return replace(self, **kwargs)


[docs]@dataclass(frozen=True)
class Site(BaseModel):
    """
    Class for keeping track of Site metadata.

    Parameters
    ----------
    name : str
        Name of the Site, e.g. Desert Rock
    latitude : float
        Latitude of the Site in decimal degrees north of the equator,
        e.g. 36.62373
    longitude : float
        Longitude of the Site in decimal degrees east of the
        prime meridian, e.g. -116.01947
    elevation : float
        Elevation of the Site in meters above mean sea level, e.g. 1007
    timezone : str
        IANA timezone of the Site, e.g. Etc/GMT+8
    site_id : str, optional
        UUID of the Site in the API
    provider : str, optional
        Provider of the Site information.
    extra_parameters : str, optional
        The extra parameters may be used by forecasters when
        implementing other PV models. The framework does not provide
        a standard set of extra parameters or require a particular
        format – these are up to the site owner.
    climate_zones : tuple of str, optional
        The climate zones that the site is within
    """
    name: str
    latitude: float
    longitude: float
    elevation: float
    timezone: str
    site_id: str = ''
    provider: str = ''
    extra_parameters: str = ''
    climate_zones: Tuple[str, ...] = ()

    @classmethod
    def from_dict(model, input_dict, raise_on_extra=False):
        dict_ = input_dict.copy()
        if 'modeling_parameters' in dict_:
            mp_dict = dict_.get('modeling_parameters', {})
            if not isinstance(mp_dict, PVModelingParameters):
                tracking_type = mp_dict.get('tracking_type', None)
                if tracking_type == 'fixed':
                    dict_['modeling_parameters'] = (
                        FixedTiltModelingParameters.from_dict(
                            mp_dict))
                    return SolarPowerPlant.from_dict(dict_, raise_on_extra)
                elif tracking_type == 'single_axis':
                    dict_['modeling_parameters'] = (
                        SingleAxisModelingParameters.from_dict(
                            mp_dict))
                    return SolarPowerPlant.from_dict(dict_, raise_on_extra)
                elif tracking_type is not None:
                    raise ValueError(
                        'tracking_type must be None, fixed, or '
                        'single_axis')
        return super().from_dict(dict_, raise_on_extra)


[docs]@dataclass(frozen=True)
class PVModelingParameters(BaseModel):
    """
    Class for keeping track of generic PV modeling parameters

    Parameters
    ----------
    ac_capacity : float
        Nameplate AC power rating in megawatts
    dc_capacity : float
        Nameplate DC power rating in megawatts
    temperature_coefficient : float
        The temperature coefficient of DC power in units of %/C.
        Typically -0.2 to -0.5 % per degree C.
    dc_loss_factor : float
        Applied to DC current in units of %. 0 = no loss.
    ac_loss_factor : float
        Appled to inverter power output in units of %. 0 = no loss.

    See Also
    --------
    :py:class:`solarforecastarbiter.datamodel.FixedTiltModelingParameters`
    :py:class:`solarforecastarbiter.datamodel.SingleAxisModelingParameters`
    """
    ac_capacity: float
    dc_capacity: float
    temperature_coefficient: float
    dc_loss_factor: float
    ac_loss_factor: float


[docs]@dataclass(frozen=True)
class FixedTiltModelingParameters(PVModelingParameters):
    """
    A class based on PVModelingParameters that has additional parameters
    for fixed tilt PV systems.

    Parameters
    ----------
    surface_tilt : float
        Tilt from horizontal of a fixed tilt system, degrees
    surface_azimuth : float
        Azimuth angle of a fixed tilt system, degrees East of North


    See Also
    --------
    :py:class:`solarforecastarbiter.datamodel.PVModelingParameters`
    """
    surface_tilt: float
    surface_azimuth: float
    tracking_type: str = 'fixed'


[docs]@dataclass(frozen=True)
class SingleAxisModelingParameters(PVModelingParameters):
    """
    A class based on PVModelingParameters that has additional parameters
    for single axis tracking systems.

    Parameters
    ----------
    axis_tilt : float
        Tilt from horizontal of the tracker axis, degrees
    axis_azimuth : float
        Azimuth angle of the tracker axis, degrees East of North
    ground_coverage_ratio : float
        Ratio of total width of modules on a tracker to the distance between
        tracker axes. For example, for trackers each with two modules of 1m
        width each, and a spacing between tracker axes of 7m, the ground
        coverage ratio is 0.286(=2/7).
    backtrack : bool
        Indicator of if a tracking system uses backtracking
    max_rotation_angle : float
        maximum rotation from horizontal of a single axis tracker, degrees

    See Also
    --------
    :py:class:`solarforecastarbiter.datamodel.PVModelingParameters`
    """
    axis_tilt: float
    axis_azimuth: float
    ground_coverage_ratio: float
    backtrack: bool
    max_rotation_angle: float
    tracking_type: str = 'single_axis'


[docs]@dataclass(frozen=True)
class SolarPowerPlant(Site):
    """
    Class for keeping track of metadata associated with solar power plant
    Sites. Adds additional parameters to the Site dataclass.

    Parameters
    ----------
    modeling_parameters : PVModelingParameters
        Modeling parameters for a single axis system

    See Also
    --------
    :py:class:`solarforecastarbiter.datamodel.Site`
    :py:class:`solarforecastarbiter.datamodel.SingleAxisModelingParameters`
    :py:class:`solarforecastarbiter.datamodel.FixedTiltModelingParameters`
    """
    modeling_parameters: PVModelingParameters = field(
        default_factory=PVModelingParameters)


def __set_units__(cls):
    if cls.variable not in ALLOWED_VARIABLES:
        raise ValueError('variable %s is not allowed' % cls.variable)
    object.__setattr__(cls, 'units', ALLOWED_VARIABLES[cls.variable])


def __generic_oneof__(cls, field, allowed):
    if getattr(cls, field) not in allowed:
        raise ValueError(f'{field} must be one of {allowed}')


def __check_interval_params__(cls):
    __generic_oneof__(
        cls, 'interval_label', ALLOWED_INTERVAL_LABELS)
    __generic_oneof__(
        cls, 'interval_value_type', ALLOWED_INTERVAL_VALUE_TYPES)


[docs]@dataclass(frozen=True)
class Observation(BaseModel):
    """
    A class for keeping track of metadata associated with an observation.
    Units are set according to the variable type.

    Parameters
    ----------
    name : str
        Name of the Observation
    variable : str
        Variable name, e.g. power, GHI. Each allowed variable has an
        associated pre-defined unit.
    interval_value_type : str
        The type of the data in the observation. Typically interval_mean or
        instantaneous, but additional types may be defined for events.
    interval_length : pandas.Timedelta
        The length of time between consecutive data points, e.g. 5 minutes,
        1 hour.
    interval_label : {'beginning', 'ending', 'instant', 'event'}
        Indicates if a time labels the beginning or the ending of an interval
        average, indicates an instantaneous value, or indicates an event.
    site : Site
        The site that this Observation was generated for.
    uncertainty : float or None
        A measure of the uncertainty of the observation values. The format
        will be determined later. None indicates that the uncertainty is
        unknown for this Observation.
    observation_id : str, optional
        UUID of the observation in the API
    provider : str, optional
        Provider of the Observation information.
    extra_parameters : str, optional
        Any extra parameters for the observation

    See Also
    --------
    :py:class:`solarforecastarbiter.datamodel.Site`
    """
    __blurb__: ClassVar[str] = 'Observation'
    name: str
    variable: str
    interval_value_type: str
    interval_length: pd.Timedelta
    interval_label: str
    site: Site
    uncertainty: Union[float, None]
    observation_id: str = ''
    provider: str = ''
    extra_parameters: str = ''
    units: str = field(init=False)

    def __post_init__(self):
        __set_units__(self)
        __check_interval_params__(self)


[docs]@dataclass(frozen=True)
class AggregateObservation(BaseModel):
    """
    Class for keeping track of an Observation and when it is added and
    (optionally) removed from an Aggregate. This metadata allows the
    Arbiter to calculate the correct quantities while the Aggregate grows
    or shrinks over time.

    Parameters
    ----------
    observation : Observation
        The Observation object that is part of the Aggregate
    effective_from : pandas.Timestamp
        The datetime of when the Observation should be
        included in the Aggregate
    effective_until : pandas.Timestamp
        The datetime of when the Observation should be
        excluded from the Aggregate
    observation_deleted_at : pandas.Timestamp
        The datetime that the Observation was deleted from the
        Arbiter. This indicates that the Observation should be
        removed from the Aggregate, and without the data
        from this Observation, the Aggregate is invalid before
        this time.

    See Also
    --------
    :py:class:`solarforecastarbiter.datamodel.Observation`
    :py:class:`solarforecastarbiter.datamodel.Aggregate`
    """
    __blurb__: ClassVar[str] = 'Aggregate Observation'
    observation: Observation
    effective_from: pd.Timestamp
    effective_until: Union[pd.Timestamp, None] = None
    observation_deleted_at: Union[pd.Timestamp, None] = None


def __check_variable__(variable, *args):
    if not all(arg.variable == variable for arg in args):
        raise ValueError('All variables must be identical.')


def __check_aggregate_interval_compatibility__(interval, *args):
    if any(arg.interval_length > interval for arg in args):
        raise ValueError('observation.interval_length cannot be greater than '
                         'aggregate.interval_length.')
    if any(arg.interval_value_type not in ('interval_mean', 'instantaneous')
           for arg in args):
        raise ValueError('Only observations with interval_value_type of '
                         'interval_mean or instantaneous are acceptable')


[docs]@dataclass(frozen=True)
class Aggregate(BaseModel):
    """
    Class for keeping track of Aggregate metadata. Aggregates always
    have interval_value_type of 'interval_mean'.

    Parameters
    ----------
    name : str
        Name of the Aggregate, e.g. Utility X Solar PV
    description : str
        A description of what the aggregate is.
    variable : str
        Variable name, e.g. power, GHI. Each allowed variable has an
        associated pre-defined unit. All observations that make up the
        Aggregate must also have this variable.
    aggregate_type : str
        The aggregation function that will be applied to observations.
        Generally, this will be 'sum' although one might be interested,
        for example, in the 'mean' irradiance of some observations.
        May be an aggregate function string supported by Pandas. Common
        options include ('sum', 'mean', 'min', 'max', 'median', 'std').
    interval_length : pandas.Timedelta
        The length of time between consecutive data points, e.g. 5 minutes,
        1 hour. This must be >= the interval lengths of any Observations that
        will make up the Aggregate.
    interval_label : str
        Indicates if a time labels the beginning or the ending of an interval
        average.
    timezone : str
        IANA timezone of the Aggregate, e.g. Etc/GMT+8
    aggregate_id : str, optional
        UUID of the Aggregate in the API
    provider : str, optional
        Provider of the Aggregate information.
    extra_parameters : str, optional
        Any extra parameters for the Aggregate.
    observations : tuple of AggregateObservation
        The Observations that contribute to the Aggregate

    See Also
    --------
    :py:class:`solarforecastarbiter.datamodel.Observation`
    """
    __blurb__: ClassVar[str] = 'Aggregate'
    name: str
    description: str
    variable: str
    aggregate_type: str
    interval_length: pd.Timedelta
    interval_label: str
    timezone: str
    observations: Tuple[AggregateObservation, ...]
    aggregate_id: str = ''
    provider: str = ''
    extra_parameters: str = ''
    units: str = field(init=False)
    interval_value_type: str = field(default='interval_mean')

    def __post_init__(self):
        __set_units__(self)
        observations = [
            ao.observation for ao in self.observations
            if ao.observation is not None]
        __check_variable__(
            self.variable,
            *observations)
        __check_aggregate_interval_compatibility__(
            self.interval_length,
            *observations)
        __generic_oneof__(self, 'aggregate_type', ALLOWED_AGGREGATE_TYPES)
        __generic_oneof__(self, 'interval_label', ('beginning', 'ending'))
        object.__setattr__(self, 'interval_value_type', 'interval_mean')


@dataclass(frozen=True)
class _ForecastBase:
    name: str
    issue_time_of_day: datetime.time
    lead_time_to_start: pd.Timedelta
    interval_length: pd.Timedelta
    run_length: pd.Timedelta
    interval_label: str
    interval_value_type: str
    variable: str


@dataclass(frozen=True)
class _ForecastDefaultsBase:
    site: Union[Site, None] = None
    aggregate: Union[Aggregate, None] = None
    forecast_id: str = ''
    provider: str = ''
    extra_parameters: str = ''
    units: str = field(init=False)


def __site_or_agg__(cls):
    if cls.site is not None and cls.aggregate is not None:
        raise KeyError('Only provide one of "site" or "aggregate" to Forecast')
    elif cls.site is None and cls.aggregate is None:
        raise KeyError('Must provide one of "site" or "aggregate" to Forecast')


# Follow MRO pattern in https://stackoverflow.com/a/53085935/2802993
# to avoid problems with inheritance in ProbabilisticForecasts
[docs]@dataclass(frozen=True)
class Forecast(BaseModel, _ForecastDefaultsBase, _ForecastBase):
    """
    A class to hold metadata for Forecast objects.

    Parameters
    ----------
    name : str
        Name of the Forecast
    issue_time_of_day : datetime.time
        The time of day that a forecast run is issued, e.g. 00:30. For
        forecast runs issued multiple times within one day (e.g. hourly),
        this specifies the first issue time of day. Additional issue times
        are uniquely determined by the first issue time and the run length &
        issue frequency attribute. This is assumed to be a UTC time.
    lead_time_to_start : pandas.Timedelta
        The difference between the issue time and the start of the first
        forecast interval, e.g. 1 hour.
    interval_length : pandas.Timedelta
        The length of time between consecutive data points, e.g. 5 minutes,
        1 hour.
    run_length : pandas.Timedelta
        The total length of a single issued forecast run, e.g. 1 hour.
        To enforce a continuous, non-overlapping sequence, this is equal
        to the forecast run issue frequency.
    interval_label : {"beginning", "ending", "instant"}
        Indicates if a time labels the beginning or the ending of an interval
        average, or indicates an instantaneous value, e.g. beginning, ending,
        instant.
    interval_value_type : str
        The type of the data in the forecast, e.g. mean, max, 95th percentile.
    variable : str
        The variable in the forecast, e.g. power, GHI, DNI, event. Each
        variable is associated with a standard unit.
    site : Site or None
        The predefined site that the forecast is for, e.g. Power Plant X.
    aggregate : Aggregate or None
        The predefined aggregate that the forecast is for, e.g. Aggregate Y.
    forecast_id : str, optional
        UUID of the forecast in the API
    provider : str, optional
        Provider of the Forecast information.
    extra_parameters : str, optional
        Extra configuration parameters of forecast.

    See Also
    --------
    :py:class:`solarforecastarbiter.datamodel.Site`
    :py:class:`solarforecastarbiter.datamodel.Aggregate`
    """
    __blurb__: ClassVar[str] = 'Forecast'

    def __post_init__(self):
        __set_units__(self)
        __site_or_agg__(self)
        __check_interval_params__(self)

    @classmethod
    def from_dict(model, input_dict, raise_on_extra=False):
        dict_ = input_dict.copy()
        if model != Forecast:
            return super().from_dict(dict_, raise_on_extra)
        if dict_.get('constant_value', None) is not None:
            return ProbabilisticForecastConstantValue.from_dict(
                dict_, raise_on_extra)
        elif dict_.get('constant_values', None) is not None:
            return ProbabilisticForecast.from_dict(dict_, raise_on_extra)
        elif dict_.get('variable') == 'event':
            return EventForecast.from_dict(dict_, raise_on_extra)
        else:
            return super().from_dict(dict_, raise_on_extra)


[docs]@dataclass(frozen=True)
class EventForecast(Forecast):
    """
    Extends Forecast dataclass to include event forecast attributes.

    Parameters
    ----------
    name : str
        Name of the Forecast
    issue_time_of_day : datetime.time
        The time of day that a forecast run is issued, e.g. 00:30. For
        forecast runs issued multiple times within one day (e.g. hourly),
        this specifies the first issue time of day. Additional issue times
        are uniquely determined by the first issue time and the run length &
        issue frequency attribute. This is assumed to be a UTC time.
    lead_time_to_start : pandas.Timedelta
        The difference between the issue time and the start of the first
        forecast interval, e.g. 1 hour.
    interval_length : pandas.Timedelta
        The length of time between consecutive data points, e.g. 5 minutes,
        1 hour.
    run_length : pandas.Timedelta
        The total length of a single issued forecast run, e.g. 1 hour.
        To enforce a continuous, non-overlapping sequence, this is equal
        to the forecast run issue frequency.
    interval_label : {'event'}
    interval_value_type : str
        The type of the data in the forecast, e.g. mean, max, 95th percentile.
    variable : {'event'}
    site : Site or None
        The predefined site that the forecast is for, e.g. Power Plant X.
    aggregate : Aggregate or None
        The predefined aggregate that the forecast is for, e.g. Aggregate Y.
    forecast_id : str, optional
        UUID of the forecast in the API
    provider : str, optional
        Provider of the Forecast information.
    extra_parameters : str, optional
        Extra configuration parameters of forecast.

    See also
    --------
    :py:class:`solarforecastarbiter.datamodel.Forecast`
    """
    __blurb__: ClassVar[str] = 'Event Forecast'

    def __post_init__(self):
        if self.interval_label != "event":
            raise ValueError("Interval label must be 'event'")
        elif self.variable != "event":
            raise ValueError("Variable must be 'event'")
        super().__post_init__()


def __set_constant_value_units__(cls):
    if cls.axis == 'x':
        # e.g. Prob(o < 10 MW). Forecast is in %, constant value is 10 MW
        object.__setattr__(cls, 'constant_value_units', cls.units)
        object.__setattr__(cls, 'units', '%')
    else:
        # e.g. Prob(o < f) = 90%. Forecast in units of obs, constant value is %
        object.__setattr__(cls, 'constant_value_units', '%')


@dataclass(frozen=True)
class _ProbabilisticForecastConstantValueBase:
    axis: str
    constant_value: float
    constant_value_units: str = field(init=False)


[docs]@dataclass(frozen=True)
class ProbabilisticForecastConstantValue(
        Forecast, _ProbabilisticForecastConstantValueBase):
    """
    Extends Forecast dataclass to include probabilistic forecast
    attributes.

    Parameters
    ----------
    name : str
        Name of the Forecast
    issue_time_of_day : datetime.time
        The time of day that a forecast run is issued, e.g. 00:30. For
        forecast runs issued multiple times within one day (e.g. hourly),
        this specifies the first issue time of day. Additional issue times
        are uniquely determined by the first issue time and the run length &
        issue frequency attribute. This is assumed to be a UTC time.
    lead_time_to_start : pandas.Timedelta
        The difference between the issue time and the start of the first
        forecast interval, e.g. 1 hour.
    interval_length : pandas.Timedelta
        The length of time between consecutive data points, e.g. 5 minutes,
        1 hour.
    run_length : pandas.Timedelta
        The total length of a single issued forecast run, e.g. 1 hour.
        To enforce a continuous, non-overlapping sequence, this is equal
        to the forecast run issue frequency.
    interval_label : str
        Indicates if a time labels the beginning or the ending of an interval
        average, or indicates an instantaneous value, e.g. beginning, ending,
        instant.
    interval_value_type : str
        The type of the data in the forecast, e.g. mean, max, 95th percentile.
    variable : str
        The variable in the forecast, e.g. power, GHI, DNI. Each variable is
        associated with a standard unit.
    site : Site or None
        The predefined site that the forecast is for, e.g. Power Plant X.
    aggregate : Aggregate or None
        The predefined aggregate that the forecast is for, e.g. Aggregate Y.
    axis : str
        The axis on which the constant values of the CDF is specified.
        The axis can be either *x* (constant variable values) or *y*
        (constant percentiles).
    constant_value : float
        The variable value or percentile.
    forecast_id : str, optional
        UUID of the forecast in the API
    provider : str, optional
        Provider of the ProbabilisticForecastConstantValue information.
    extra_parameters : str, optional
        Extra configuration parameters of forecast.

    See also
    --------
    :py:class:`solarforecastarbiter.datamodel.ProbabilisticForecast`
    """
    __blurb__: ClassVar[str] = 'Probabilistic Forecast Constant Value'

    def __post_init__(self):
        super().__post_init__()
        __check_axis__(self.axis)
        __set_constant_value_units__(self)


@dataclass(frozen=True)
class _ProbabilisticForecastBase:
    axis: str
    constant_values: Tuple[Union[ProbabilisticForecastConstantValue, float, int], ...]  # NOQA
    constant_value_units: str = field(init=False)


[docs]@dataclass(frozen=True)
class ProbabilisticForecast(
        Forecast, _ProbabilisticForecastBase):
    """
    Tracks a group of ProbabilisticForecastConstantValue objects that
    together describe 1 or more points of the same probability
    distribution.

    Parameters
    ----------
    name : str
        Name of the Forecast
    issue_time_of_day : datetime.time
        The time of day that a forecast run is issued, e.g. 00:30. For
        forecast runs issued multiple times within one day (e.g. hourly),
        this specifies the first issue time of day. Additional issue times
        are uniquely determined by the first issue time and the run length &
        issue frequency attribute. This is assumed to be a UTC time.
    lead_time_to_start : pandas.Timedelta
        The difference between the issue time and the start of the first
        forecast interval, e.g. 1 hour.
    interval_length : pandas.Timedelta
        The length of time between consecutive data points, e.g. 5 minutes,
        1 hour.
    run_length : pandas.Timedelta
        The total length of a single issued forecast run, e.g. 1 hour.
        To enforce a continuous, non-overlapping sequence, this is equal
        to the forecast run issue frequency.
    interval_label : str
        Indicates if a time labels the beginning or the ending of an interval
        average, or indicates an instantaneous value, e.g. beginning, ending,
        instant.
    interval_value_type : str
        The type of the data in the forecast, e.g. mean, max, 95th percentile.
    variable : str
        The variable in the forecast, e.g. power, GHI, DNI. Each variable is
        associated with a standard unit.
    site : Site or None
        The predefined site that the forecast is for, e.g. Power Plant X.
    aggregate : Aggregate or None
        The predefined aggregate that the forecast is for, e.g. Aggregate Y.
    axis : str
        The axis on which the constant values of the CDF is specified.
        The axis can be either *x* (constant variable values) or *y*
        (constant percentiles).
    constant_values : tuple of ProbabilisticForecastConstantValue or float
        The variable values or percentiles. Floats will automatically
        be converted to ProbabilisticForecastConstantValue objects.
    forecast_id : str, optional
        UUID of the forecast in the API
    provider : str, optional
        Provider of the ProbabilisticForecast information.
    extra_parameters : str, optional
        Extra configuration parameters of forecast.

    See also
    --------
    ProbabilisticForecastConstantValue
    Forecast
    """
    __blurb__: ClassVar[str] = 'Probabilistic Forecast'

    def __post_init__(self):
        super().__post_init__()
        __check_axis__(self.axis)
        __set_constant_value_units__(self)
        __set_constant_values__(self)
        __check_axis_consistency__(self.axis, self.constant_values)


def __validate_cost__(index_var):
    def val(obj):
        if hasattr(obj, 'fill'):
            fillkeys = ALLOWED_COST_FILL_OPTIONS
            if obj.fill not in fillkeys:
                raise ValueError(
                    f"Cost 'fill' must be one of {str(fillkeys)}")
        if hasattr(obj, 'aggregation'):
            aggkeys = ALLOWED_COST_AGG_OPTIONS
            if obj.aggregation not in aggkeys:
                raise ValueError(
                    f"Cost 'aggregation' must be one of {str(aggkeys)}")
        if index_var is not None:
            if len(obj.cost) != len(getattr(obj, index_var)):
                raise ValueError(
                    f"'cost' and '{index_var}' must have the same length")
    return val


[docs]@dataclass(frozen=True)
class TimeOfDayCost(BaseModel):
    """Cost values based on the time of day.

    Parameters
    ----------
    times : tuple of datetime.time
        The times to associate with each cost value
    cost : tuple of float
        The cost per unit error of the forecasted variable for each time.
        Must have the same length as `times`.
    aggregation : str
        Aggregation method to use after calculating cost for the error series.
        Currently only 'sum' or 'mean' are available.
    net : bool
        If True, compute the 'net' aggregate error instead of first calcuating
        the absolute error before performing the aggregation.
    fill : str
        Fill method to apply for times between those specified in `times`.
        Options are 'forward' or 'backward'.
    timezone : str, default None
        IANA timezone string to use when constructing datetimes. If None,
        the timezone of the observations is used, which is the report
        timezone when calculated in a report.
    """
    times: Tuple[datetime.time, ...]
    cost: Tuple[float, ...]
    aggregation: str
    net: bool
    fill: str
    timezone: str = None
    __post_init__ = __validate_cost__('times')


[docs]@dataclass(frozen=True)
class DatetimeCost(BaseModel):
    """Cost values based on datetimes.

    Parameters
    ----------
    datetimes : tuple/iterable of datetime-like objects
       The datetimes to associate with each cost value
    cost : tuple of float
       The cost per unit error of the forecasted variable for each datetime.
       Must have the same length as `datetimes`.
    aggregation : str
        Aggregation method to use after calculating cost for the error series.
        Currently only 'sum' or 'mean' are available.
    net : bool
        If True, compute the 'net' aggregate error instead of first calcuating
        the absolute error before performing the aggregation.
    fill : str
       Fill method to apply for datetimes between those specified in
       `datetimes`. Options are 'forward' or 'backward'.
    timezone : str, default None
        IANA timezone string to use when constructing datetimes. If None,
        the timezone of the observations is used, which is the report
        timezone when calculated in a report.
        """
    datetimes: Tuple[pd.Timestamp, ...]
    cost: Tuple[float, ...]
    aggregation: str
    net: bool
    fill: str
    timezone: str = None
    __post_init__ = __validate_cost__('datetimes')


[docs]@dataclass(frozen=True)
class ConstantCost(BaseModel):
    """A constant cost per unit error of the forecasted variable

    Parameters
    ----------
    cost : float
    aggregation : str
        Aggregation method to use after calculating cost for the error series.
        Currently only 'sum' or 'mean' are available.
    net : bool
        If True, compute the 'net' aggregate error instead of first calcuating
        the absolute error before performing the aggregation.
    """
    cost: float
    aggregation: str
    net: bool
    __post_init__ = __validate_cost__(None)


[docs]@dataclass(frozen=True)
class CostBand(BaseModel):
    """Cost specification for one error band

    Parameters
    ----------
    error_range : tuple(float, float)
        Bounds of the error to apply the specified cost function to.
        Inf and -Inf are valid range points, and the error may be positive or
        negative. Inclusion/exclusion of endpoints is determined by ordering
        in :py:class:`solarforecastarbiter.datamodel.ErrorBandCost`.
    cost_function : str
        One of 'timeofday', 'datetime', or 'constant'. Specifies which
        cost model should be used to calculate the cost in this band.
    cost_function_parameters : :py:class:`solarforecastarbiter.datamodel.ConstantCost` or :py:class:`solarforecastarbiter.TimeOfDayCost` or :py:class:`solarforecastarbiter.DatetimeCost`
        Parameters for the selected cost function.
    """  # NOQA: E501
    error_range: Tuple[float, float]
    cost_function: str
    cost_function_parameters: Union[TimeOfDayCost, DatetimeCost, ConstantCost]

    def _special_field_processing(self, model_field, val):
        # support passing "inf", "-inf" as strings via json/dict
        if model_field.name == 'error_range':
            return float(val)
        else:  # pragma: no cover
            return val

    def __post_init__(self):
        if self.cost_function == 'timeofday':
            if not isinstance(self.cost_function_parameters, TimeOfDayCost):
                raise TypeError(
                    "'cost_function_parameters' must be of type TimeOfDayCost "
                    "for 'timeofday' cost function.")
        elif self.cost_function == 'datetime':
            if not isinstance(self.cost_function_parameters, DatetimeCost):
                raise TypeError(
                    "'cost_function_parameters' must be of type DatetimeCost "
                    "for 'datetime' cost function.")
        elif self.cost_function == 'constant':
            if not isinstance(self.cost_function_parameters, ConstantCost):
                raise TypeError(
                    "'cost_function_parameters' must be of type ConstantCost "
                    "for 'constant' cost function.")
        else:
            raise ValueError(
                "'cost_function' must be one of 'timeofday', 'datetime', or"
                " 'constant'")


[docs]@dataclass(frozen=True)
class ErrorBandCost(BaseModel):
    """Cost that varies based on the error value. For each error band,
    one of the other cost functions is applied to the errors within the band.
    If an error value does not fall within any band ranges, no cost is
    calculated for that error.

    Parameters
    ----------
    bands : tuple of :py:class:`solarforecastarbiter.datamodel.CostBand`
       Specification of the error bands and associated cost functions.

    Notes
    -----
    Each error is restricted to a single band/cost function, so the
    order in bands determines which band is applied in ascending
    priority. For example, if ``bands[0].error_range = (0, 2)``
    and ``bands[1].error_range == (1, 3)``, the cost function of
    bands[0] is applied for all errors from [0, 2] and bands[1]
    is applied for errors from (2, 3].
    """
    bands: Tuple[CostBand, ...]


[docs]@dataclass(frozen=True)
class Cost(BaseModel):
    """Specify how cost metrics should be calculated.

    Parameters
    ----------
    name : str
        Identifier for these cost parameters
    type : str
       The type of cost parameters that are included in `parameters`. One of
       'timeofday', 'datetime', 'constant', or 'errorband'.
    parameters : :py:class:`solarforecastarbiter.datamodel.ConstantCost` or :py:class:`solarforecastarbiter.TimeOfDayCost` or :py:class:`solarforecastarbiter.DatetimeCost` or :py:class:`solarforecastarbiter.ErrorBandCost`
        Parameters for the specific cost function type.
    """  # NOQA: E501
    name: str
    type: str
    parameters: Union[TimeOfDayCost, DatetimeCost, ConstantCost, ErrorBandCost]

    def __post_init__(self):
        if self.type not in ALLOWED_COST_FUNCTIONS:
            raise ValueError(
                f"'type' must be one of {ALLOWED_COST_FUNCTIONS}")

    @classmethod
    def from_dict(model, input_dict, raise_on_extra=False):
        dict_ = input_dict.copy()
        type_ = dict_['type']
        param_dict = dict_.get('parameters', {})
        if type_ == 'timeofday':
            dict_['parameters'] = TimeOfDayCost.from_dict(param_dict)
        elif type_ == 'datetime':
            dict_['parameters'] = DatetimeCost.from_dict(param_dict)
        elif type_ == 'constant':
            dict_['parameters'] = ConstantCost.from_dict(param_dict)
        elif type_ == 'errorband':
            dict_['parameters'] = ErrorBandCost.from_dict(param_dict)
        else:
            raise ValueError(
                f"'type' must be one of {ALLOWED_COST_FUNCTIONS}")
        return super().from_dict(dict_, raise_on_extra)


def __set_constant_values__(self):
    out = []
    for cv in self.constant_values:
        if isinstance(cv, ProbabilisticForecastConstantValue):
            out.append(cv)
        elif isinstance(cv, (float, int)):
            cv_dict = self.to_dict()
            cv_dict.pop('forecast_id', None)
            cv_dict['constant_value'] = cv
            out.append(
                ProbabilisticForecastConstantValue.from_dict(cv_dict))
        else:
            raise TypeError(
                f'Invalid type for a constant value {cv}. '
                'Must be int, float, or ProbablisticConstantValue')
    object.__setattr__(self, 'constant_values', tuple(out))


def __check_axis__(axis):
    if axis not in ('x', 'y'):
        raise ValueError('Axis must be x or y')


def __check_axis_consistency__(axis, constant_values):
    if not all(arg.axis == axis for arg in constant_values):
        raise ValueError('All axis attributes must be identical')


def __check_units__(*args):
    if len(args) == 0:
        return
    unique_units = set()
    for arg in args:
        if getattr(arg, 'axis', None) == 'x':
            unique_units.add(arg.constant_value_units)
        else:
            unique_units.add(arg.units)
    if len(unique_units) > 1:
        raise ValueError('All units must be identical.')


def __check_interval_compatibility__(forecast, observation):
    if observation.interval_length > forecast.interval_length:
        raise ValueError('observation.interval_length cannot be greater than '
                         'forecast.interval_length.')
    if ('instant' in forecast.interval_label and
            'instant' not in observation.interval_label):
        raise ValueError('Instantaneous forecasts cannot be evaluated against '
                         'interval average observations.')


[docs]@dataclass(frozen=True)
class ForecastObservation(BaseModel):
    """
    Class for pairing Forecast and Observation objects for evaluation.

    Parameters
    ----------
    forecast: :py:class:`solarforecastarbiter.datamodel.Forecast`
    observation: :py:class:`solarforecastarbiter.datamodel.Observation`
    reference_forecast: :py:class:`solarforecastarbiter.datamodel.Forecast` or None
    normalization: float or None
        If None, determined by __set_normalization__
    uncertainty: None, float, or str
        If None, uncertainty is not accounted for. Float specifies the
        uncertainty as a percentage from 0 to 100%. If str, may be
        'observation_uncertainty' to indicate that the value should be
        set to ``observation.uncertainty``, or may be coerceable to a
        float.
    cost: str or None
        Cost parameters to use from the costs associated with ReportParameters
    """  # NOQA
    forecast: Forecast
    observation: Observation
    reference_forecast: Union[Forecast, None] = None
    # some function applied to observation (e.g. mean per day)
    # possible in future. maybe add pd.Series like for
    # ProcessedForecastObservation
    normalization: Union[float, None] = None
    uncertainty: Union[None, float, str] = None
    cost: Union[str, None] = None
    data_object: Observation = field(init=False)

    def __post_init__(self):
        __set_normalization__(self)
        __set_uncertainty__(self)
        object.__setattr__(self, 'data_object', self.observation)
        __check_units__(self.forecast, self.data_object)
        __check_interval_compatibility__(self.forecast, self.data_object)


def __set_normalization__(self):
    if self.normalization is None:
        if self.observation.variable == 'ac_power':
            norm = self.observation.site.modeling_parameters.ac_capacity
        elif self.observation.variable == 'dc_power':
            norm = self.observation.site.modeling_parameters.dc_capacity
        elif self.observation.units == 'W/m^2':
            # normalizing by 1000 W/m^2 was considered and rejected
            # https://github.com/SolarArbiter/solarforecastarbiter-core/pull/379#discussion_r402434134
            # keep W/m^2 as separate item for likely future improvements
            norm = np.nan
        else:
            norm = np.nan
    else:
        # norm was supplied, but we're going to make sure it can coerced
        # to a float
        norm = self.normalization
    norm = float(norm)  # from_dict only checks for floats, chokes on ints
    object.__setattr__(self, 'normalization', norm)


def __set_aggregate_normalization__(self):
    # https://github.com/SolarArbiter/solarforecastarbiter-core/issues/381
    norm = np.nan
    object.__setattr__(self, 'normalization', norm)


def __set_uncertainty__(self):
    if isinstance(self.uncertainty, str):
        try:
            unc = float(self.uncertainty)
        except ValueError:
            if self.uncertainty == 'observation_uncertainty':
                object.__setattr__(
                    self, 'uncertainty', self.observation.uncertainty)
            else:
                # easy to mistype 'observation_uncertainty', so be helpful
                raise ValueError(
                    ('Invalid uncertainty %s. uncertainty must be set to None,'
                     ' a float, or "observation_uncertainty"') %
                    self.uncertainty)
        else:
            object.__setattr__(self, 'uncertainty', unc)


[docs]@dataclass(frozen=True)
class ForecastAggregate(BaseModel):
    """
    Class for pairing Forecast and Aggregate objects for evaluation.

    Parameters
    ----------
    forecast: :py:class:`solarforecastarbiter.datamodel.Forecast`
    aggregate: :py:class:`solarforecastarbiter.datamodel.Aggregate`
    reference_forecast: :py:class:`solarforecastarbiter.datamodel.Forecast` or None
    normalization: float or None
        If None, assigned 1.
    uncertainty: None, float, or str
        If None, uncertainty is not accounted for. Float specifies the
        uncertainty as a percentage from 0 to 100%. Strings must be
        coerceable to a float.
    cost: str or None
        Cost parameters to use from the costs associated with ReportParameters
    """  # NOQA
    forecast: Forecast
    aggregate: Aggregate
    reference_forecast: Union[Forecast, None] = None
    normalization: Union[float, None] = None
    uncertainty: Union[float, None] = None
    cost: Union[str, None] = None
    data_object: Aggregate = field(init=False)

    def __post_init__(self):
        if self.normalization is None:
            __set_aggregate_normalization__(self)
        if self.uncertainty is not None:
            object.__setattr__(self, 'uncertainty', float(self.uncertainty))
        object.__setattr__(self, 'data_object', self.aggregate)
        __check_units__(self.forecast, self.data_object)
        __check_interval_compatibility__(self.forecast, self.data_object)


[docs]@dataclass(frozen=True)
class BaseFilter(BaseModel):
    """
    Base class for filters to be applied in a report.
    """
    @classmethod
    def from_dict(model, input_dict, raise_on_extra=False):
        dict_ = input_dict.copy()
        if model != BaseFilter:
            return super().from_dict(dict_, raise_on_extra)

        if 'quality_flags' in dict_:
            return QualityFlagFilter.from_dict(dict_, raise_on_extra)
        elif 'time_of_day_range' in dict_:
            return TimeOfDayFilter.from_dict(dict_, raise_on_extra)
        elif 'value_range' in dict_:
            return ValueFilter.from_dict(dict_, raise_on_extra)
        else:
            raise NotImplementedError(
                'Do not know how to process dict into a Filter.')


[docs]@dataclass(frozen=True)
class QualityFlagFilter(BaseFilter):
    """Quality flag filters to be applied in a report.

    Parameters
    ----------
    quality_flags : Tuple of str
        Strings corresponding to ``BITMASK_DESCRIPTION_DICT`` or
        ``DERIVED_MASKS`` keys.
        These periods will be excluded from the analysis.
    discard_before_resample : bool, default True
        Determines if points should be discarded before resampling or
        only during resampling (when ``resample_threshold_percentage``
        is exceeded).
    resample_threshold_percentage : float, default 10.
        The percentage of points in a resampled interval that must be
        flagged for the resampled interval to be flagged.

    Notes
    -----
    If ``discard_before_resample`` is ``True``, the ``quality_flags``
    are applied to the data before it is resampled. During resampling,
    intervals are discarded if ``resample_threshold_percentage`` is
    exceeded.

    If ``discard_before_resample`` is ``False``, the ``quality_flags``
    are only considered during the resampling operation. The
    ``quality_flags`` of the raw observations are combined with ``OR``,
    the total number of flagged points within a resample period is
    computed, and intervals are discarded where
    ``resample_threshold_percentage`` is exceeded.
    """
    quality_flags: Tuple[str, ...] = (
        'UNEVEN FREQUENCY', 'LIMITS EXCEEDED', 'CLEARSKY EXCEEDED',
        'DAYTIME STALE VALUES', 'INCONSISTENT IRRADIANCE COMPONENTS'
    )
    discard_before_resample: bool = True
    resample_threshold_percentage: float = 10.

    def __post_init__(self):
        if not all(flag in ALLOWED_QUALITY_FLAGS
                   for flag in self.quality_flags):
            raise ValueError('Quality flags must be in '
                             'BITMASK_DESCRIPTION_DICT or DERIVED_MASKS')


[docs]@dataclass(frozen=True)
class TimeOfDayFilter(BaseFilter):
    """
    Class representing a time of day filter to be applied in a report.

    Parameters
    ----------
    time_of_day_range : (datetime.time, datetime.time) tuple
        Time of day range to calculate errors. Range is inclusive of
        both endpoints. Do not use this to exclude nighttime; instead
        set the corresponding quality_flag.
    """
    time_of_day_range: Tuple[datetime.time, datetime.time]
    # add these?
    # discard_before_resample: bool = True
    # resample_threshold_percentage: float = 10.


[docs]@dataclass(frozen=True)
class ValueFilter(BaseFilter):
    """
    Class representing an observation or forecast value filter to be
    applied in a report.

    Parameters
    ----------
    metadata : :py:class:`solarforecastarbiter.datamodel.Forecast` or :py:class:`solarforecastarbiter.datamodel.Observation`
        Object to get values for.
    value_range : (float, float) tuple
        Value range to calculate errors. Range is inclusive
        of both endpoints. Filters are applied before resampling.
    """  # NOQA
    # TODO: implement. Also add Aggregate
    metadata: Union[Observation, Forecast]
    value_range: Tuple[float, float]
    # add these?
    # discard_before_resample: bool = True
    # resample_threshold_percentage: float = 10.


def __check_metrics__(fx, metrics):
    """Validate metrics selection.

    Check that the selected metrics are valid for the given scenario (e.g.
    if deterministic forecasts, then deterministic metrics).

    Parameters
    ----------
    fx : Forecast, ProbabilisticForecast, ProbabilisticForecastConstantValue
        Forecast to be evaluated by metrics.
    metrics : Tuple of str
        Metrics to be computed in the report.

    Returns
    -------
    None

    Raises
    ------
    ValueError
        If the selected metrics are not valid for the given forecast type.

    """

    if isinstance(fx, (ProbabilisticForecast,
                       ProbabilisticForecastConstantValue)):
        if not set(metrics) <= ALLOWED_PROBABILISTIC_METRICS.keys():
            raise ValueError("Metrics must be in "
                             "ALLOWED_PROBABILISTIC_METRICS.")
    elif isinstance(fx, EventForecast):
        if not set(metrics) <= ALLOWED_EVENT_METRICS.keys():
            raise ValueError("Metrics must be in "
                             "ALLOWED_EVENT_METRICS.")
    elif isinstance(fx, Forecast):
        if not set(metrics) <= ALLOWED_DETERMINISTIC_METRICS.keys():
            raise ValueError("Metrics must be in "
                             "ALLOWED_DETERMINISTIC_METRICS.")


def __check_categories__(categories):
    if not set(categories) <= ALLOWED_CATEGORIES.keys():
        raise ValueError('Categories must be in ALLOWED_CATEGORIES')


[docs]@dataclass(frozen=True)
class ValidationResult(BaseModel):
    """Store the validation result for a flag or combination of flags.

    Parameters
    ----------
    flag: str
        The quality flag(s) being recorded. See
        :py:mod:`solarforecastarbiter.validation.quality_mapping`.
    count: int
        The number of timestamps that were flagged.
    before_resample: bool
        If the flag was applied before resampling.
    """
    flag: str
    count: int
    before_resample: bool = True


[docs]@dataclass(frozen=True)
class PreprocessingResult(BaseModel):
    """Stores summary information to record preprocessing results that
    detail how data has been handled.

    Parameters
    ----------
    name: str
        The human readable name noting the process and data applied.
    count: int
        The number of timestamps that were managed in the process.
    """
    name: str
    count: int


# need apply filtering + resampling to each forecast obs pair
[docs]@dataclass(frozen=True)
class ProcessedForecastObservation(BaseModel):
    """
    Hold the processed forecast and observation data with the resampling
    parameters.

    Parameters
    ----------
    name: str
    original: :py:class:`solarforecastarbiter.datamodel.ForecastObservation` or :py:class:`solarforecastarbiter.ForecastAggregate`
    interval_value_type: str
    interval_length: pd.Timedelta
    interval_label: str
    valid_point_count: int
        The number of valid points in the processed forecast.
    forecast_values: pandas.Series or str or None
        The values of the forecast, the forecast id or None.
    observation_values: pandas.Series or str or None
        The values of the observation, the observation or aggregated id, or
        None.
    reference_forecast_values: pandas.Series or str or None
        The values of the reference forecast, the reference forecast id or
        None.
    validation_results: tuple of :py:class:`solarforecastarbiter.datamodel.ValidationResult`
    preprocessing_results: tuple of :py:class:`solarforecastarbiter.datamodel.PreprocessingResult`
    normalization_factor: pandas.Series or Float
    uncertainty: None or float
        If None, uncertainty is not accounted for. Float specifies the
        uncertainty as a percentage from 0 to 100%.
    cost: :py:class:`solarforecastarbiter.datamodel.Cost` or None
        The parameters to use when calculating cost metrics.
    """  # NOQA
    name: str
    # do this instead of subclass to compare objects later
    original: Union[ForecastObservation, ForecastAggregate]
    interval_value_type: str
    interval_length: pd.Timedelta
    interval_label: str
    valid_point_count: int
    forecast_values: Union[pd.Series, str, None]
    observation_values: Union[pd.Series, str, None]
    reference_forecast_values: Union[pd.Series, str, None] = None
    validation_results: Tuple[ValidationResult, ...] = ()
    preprocessing_results: Tuple[PreprocessingResult, ...] = ()
    # This may need to be a series, e.g. normalize by the average
    # observed value per day. Hence, repeat here instead of
    # only in original
    normalization_factor: Union[pd.Series, float] = 1.0
    uncertainty: Union[None, float] = None
    cost: Union[Cost, None] = None


[docs]@dataclass(frozen=True)
class MetricValue(BaseModel):
    """Class for storing the result of a single metric calculation.

    Parameters
    ----------
    category: str
        The category of the metric value, e.g. total, monthly, hourly.
    metric: str
        The metric that was calculated.
    index: str
        The index of the metric value, e.g. '1-12' for monthly metrics or
        0-23 for hourly.
    value: float
        The value calculated for the metric.
    """
    category: str
    metric: str
    index: str
    value: float


[docs]@dataclass(frozen=True)
class MetricResult(BaseModel):
    """Class for storing the results of many metric calculations for a single
    observation and forecast pair.

    Parameters
    ----------
    name: str
        A descriptive name for the MetricResult.
    forecast_id: str
        UUID of the forecast being analyzed.
    values: tuple of :py:class: `solarforecastarbiter.datamodel.MetricValue`
    observation_id: str or None
        UUID of the observation being analyzed.
    aggregate_id: str or None
        UUID of the aggregate being analyzed.
    is_summary: bool
        If this metric result represents summary statistics of the
        observation and forecasts timeseries.

    Notes
    -----
    Only one of `aggregate_id` or `observation_id` may be set.

    Raises
    ------
    ValueError
        When both `aggregate_id` and `observation_id` are not None, or when
        both are None.
    """
    name: str
    forecast_id: str
    values: Tuple[MetricValue, ...]
    observation_id: Union[str, None] = None
    aggregate_id: Union[str, None] = None
    is_summary: bool = False

    def __post_init__(self):
        if (
                (self.observation_id is None and self.aggregate_id is None)
                or (
                    self.observation_id is not None and
                    self.aggregate_id is not None
                )
        ):
            raise ValueError(
                'One of observation_id OR aggregate_id must be set')


def __check_plot_spec__(plot_spec):
    """Ensure that the provided plot specification is a valid JSON object"""
    try:
        spec_dict = json.loads(plot_spec)
        validate(instance=spec_dict, schema={'type': 'object'})
    except (json.JSONDecodeError, ValidationError):
        raise ValueError('Figure spec must be a valid json object.')


[docs]@dataclass(frozen=True)
class ReportFigure(BaseModel):
    """Parent class for different types of Report Figures"""
    def __post_init__(self):
        if type(self) == ReportFigure:
            raise ValueError("Invalid Report Figure. Figures must be of class "
                             "PlotlyReportFigure or BokehReportFigure.")

    @classmethod
    def from_dict(model, input_dict, raise_on_extra=False):
        dict_ = input_dict.copy()
        if model != ReportFigure:
            return super().from_dict(dict_, raise_on_extra)
        figure_class = dict_.get('figure_class')
        if figure_class == 'plotly':
            return PlotlyReportFigure.from_dict(dict_, raise_on_extra)
        elif figure_class == 'bokeh':
            return BokehReportFigure.from_dict(dict_, raise_on_extra)
        else:
            raise NotImplementedError(
                'Do not know how to process dict into a ReportFigure.')


@dataclass(frozen=True)
class PlotlyReportFigure(ReportFigure):
    """A class for storing metric plots for a report with associated metadata.

    Parameters
    ----------
    name: str
        A descriptive name for the figure.
    spec: str
        JSON string representation of the plotly plot.
    figure_type: str
        The type of plot, e.g. bar or scatter.
    pdf: str
        A static PDF copy of the plot, for including in PDF reports.
    svg: str
        DEPRECATED for pdf. A static svg copy of the plot.
    category: str
        The metric category. One of ALLOWED_CATEGORIES keys.
    metric: str
        The metric being plotted.
    """
    name: str
    spec: str
    figure_type: str
    pdf: str = ''
    svg: str = ''
    category: str = ''
    metric: str = ''
    figure_class: str = 'plotly'

    def __post_init__(self):
        __check_plot_spec__(self.spec)


@dataclass(frozen=True)
class BokehReportFigure(ReportFigure):
    """A class for storing metric plots for a report with associated metadata.
    Parameters
    ----------
    name: str
        A descriptive name for the figure.
    div: str
        An html div element to be target of Bokeh javascript.
    svg: str
        A static svg copy of the plot, for including in the pdf version.
    figure_type: str
        The type of plot, e.g. bar or scatter.
    category: str
        The metric category. One of ALLOWED_CATEGORIES keys.
    metric: str
        The metric being plotted.
    """
    name: str
    div: str
    svg: str
    figure_type: str
    category: str = ''
    metric: str = ''
    figure_class: str = 'bokeh'


def __bokeh_or_plotly__(cls):
    if cls.bokeh_version is not None and cls.plotly_version is not None:
        raise KeyError('Only provide one of "bokeh_version" or '
                       '"plotly_version" to RawReportPlots')
    elif cls.bokeh_version is None and cls.plotly_version is None:
        raise KeyError('Must provide one of "bokeh_version" or '
                       '"plotly_version" to RawReportPlots')


[docs]@dataclass(frozen=True)
class RawReportPlots(BaseModel):
    """Class for storing collection of all metric plots on a raw report.

    Parameters
    ----------
    figures: tuple of :py:class:`solarforecastarbiter.datamodel.ReportFigure`
    plotly_version: str
        The plotly version used when generating metrics plots.
    """
    figures: Tuple[ReportFigure, ...]
    plotly_version: Union[str, None] = None
    bokeh_version: Union[str, None] = None
    script: Union[str, None] = None

    def __post_init__(self):
        __bokeh_or_plotly__(self)
        if self.bokeh_version is not None:
            if self.script is None:
                raise KeyError('Must provide script for Bokeh plots to '
                               'RawReportPlots')


[docs]@dataclass(frozen=True)
class ReportMessage(BaseModel):
    """Class for intercepting errors and warnings associated with report
    processing.

    Parameters
    ----------
    messages: str
    step: str
    level: str
    function: str
        The function where the error originated.
    """
    message: str
    step: str
    level: str
    function: str


@dataclass(frozen=True)
class TimePeriod(BaseModel):
    """Class for storing a generic time period. For example, a report
    outage.

    Parameters
    ----------
    start : pandas.Timestamp
        Start time of the time period.
    end : pandas.Timestamp
        End time of the time period.
    """
    start: pd.Timestamp
    end: pd.Timestamp


[docs]@dataclass(frozen=True)
class RawReport(BaseModel):
    """Class for holding the result of processing a report request
    including some metadata, the calculated metrics, plots, the
    processed forecast/observation data, and messages from report
    generation. This is called a "raw" report because this object,
    along with the report parameters, can be processed into a HTML or
    PDF report.

    Parameters
    ----------
    generated_at: pandas.Timestamp
        The time at report computation.
    timezone: str
        The IANA timezone of the report.
    versions: dict
        Dictionary of version information to ensure the correct version of
        the core library is used when rendering or recomputing the report.
    plots: :py:class:`solarforecastarbiter.datamodel.RawReportPlots`
    metrics: tuple of :py:class:`solarforecastarbiter.datamodel.MetricResult`
    processed_forecasts_observations: tuple of :py:class:`solarforecastarbiter.datamodel.ProcessedForecastObservation`
    messages: tuple of :py:class:`solarforecastarbiter.datamodel.ReportMessage`
    data_checksum: str or None
        SHA-256 checksum of the raw data used in the report.
    outages: Tuple[TimePeriod, ...], optional
        List of report outage periods used when this raw report was generated.
    """  # NOQA
    generated_at: pd.Timestamp
    timezone: str
    versions: Tuple[Tuple[str, str], ...]
    plots: RawReportPlots
    metrics: Tuple[MetricResult, ...]
    processed_forecasts_observations: Tuple[ProcessedForecastObservation, ...]
    messages: Tuple[ReportMessage, ...] = ()
    data_checksum: Union[str, None] = None
    outages: Tuple[TimePeriod, ...] = ()


def __check_cost_consistency__(object_pairs, available_costs):
    cost_names = [ac.name for ac in available_costs]
    for op in object_pairs:
        if op.cost is not None and op.cost not in cost_names:
            raise ValueError(
                f'Object pair cost, {op.cost}, not present in cost '
                'parameters specified here')


[docs]@dataclass(frozen=True)
class ReportParameters(BaseModel):
    """Parameters required to define and generate a Report.

    Parameters
    ----------
    name : str
        Name of the report.
    start : pandas.Timestamp
        Start time of the reporting period.
    end : pandas.Timestamp
        End time of the reporting period.
    forecast_fill_method : {'drop', 'forward', float}
        Indicates what process to use for handling missing forecasts.
    object_pairs: Tuple of ForecastObservation or ForecastAggregate
        Paired Forecasts and Observations or Aggregates to be analyzed
        in the report.
    metrics : Tuple of str
        Metrics to be computed in the report.
    categories : Tuple of str
        Categories to compute and organize metrics over in the report.
    filters : Tuple of Filters
        Filters to be applied to the data in the report.
    costs : Tuple of Costs
        Cost parameters that can be referenced in `object_pairs`
        to compute cost metrics for that pair. Each object pair must have
        the 'cost' parameter set to None (no cost calculation will be
        performed) or one of the names of these costs.
    timezone : str or None
        The timezone in which to compute daily, hourly, etc. statistics.
        If None, inferred from data in object_pairs.
    """
    name: str
    start: pd.Timestamp
    end: pd.Timestamp
    object_pairs: Tuple[Union[ForecastObservation, ForecastAggregate], ...]
    metrics: Tuple[str, ...] = ('mae', 'mbe', 'rmse')
    categories: Tuple[str, ...] = ('total', 'date', 'hour')
    forecast_fill_method: str = 'drop'
    filters: Tuple[BaseFilter, ...] = field(
        default_factory=lambda: (QualityFlagFilter(), ))
    costs: Tuple[Cost, ...] = tuple()
    timezone: Optional[str] = None

    def __post_init__(self):
        # ensure that all forecast and observation units are the same
        __check_units__(*itertools.chain.from_iterable(
            (k.forecast, k.data_object) for k in self.object_pairs))
        # ensure the metrics can be applied to the forecasts and observations
        for k in self.object_pairs:
            __check_metrics__(k.forecast, self.metrics)
        # ensure that categories are valid
        __check_categories__(self.categories)
        __check_cost_consistency__(self.object_pairs, self.costs)


[docs]@dataclass(frozen=True)
class Report(BaseModel):
    """Class for keeping track of report metadata and the raw report that
    can later be rendered to HTML or PDF. Functions in
    :py:mod:`~solarforecastarbiter.reports.main` take a Report object
    with `raw_report` set to None, generate the report, and return
    another Report object with `raw_report` set to a RawReport object
    that can be rendered.

    Parameters
    ----------
    report_parameters : ReportParameters
        Metadata required to specify and generate the report.
    raw_report : RawReport or None
        Once computed, the raw report should be stored here
    status : str
        Status of the report
    report_id : str
        ID of the report in the API
    provider : str, optional
        Provider of the Report information.
    outages: Tuple[TimePeriod, ...], optional
        List of report outage periods.
    __version__ : str
        Should be used to version reports to ensure even older
        reports can be properly rendered

    """
    report_parameters: ReportParameters
    raw_report: Union[None, RawReport] = None
    status: str = 'pending'
    report_id: str = ''
    provider: str = ''
    outages: Tuple[TimePeriod, ...] = ()
    __version__: int = 0  # should add version to api


FORECAST_TYPE_MAPPING = {
    'forecast': Forecast,
    'event_forecast': EventForecast,
    'probabilistic_forecast': ProbabilisticForecast,
    'probabilistic_forecast_constant_value': ProbabilisticForecastConstantValue
}