Source code for solarforecastarbiter.datamodel

"""
Data classes and acceptable variables as defined by the SolarForecastArbiter
Data Model document. Python 3.7 is required.
"""
from dataclasses import (dataclass, field, fields, MISSING, asdict,
                         replace, is_dataclass)
import datetime
import itertools
import json
from jsonschema import validate
from jsonschema.exceptions import ValidationError
from typing import Optional, Tuple, Union, ClassVar


import numpy as np
import pandas as pd


from solarforecastarbiter.metrics.deterministic import \
    _MAP as deterministic_mapping
from solarforecastarbiter.metrics.deterministic import \
    _FILL_OPTIONS, _COST_FUNCTION_MAP, _AGG_OPTIONS
from solarforecastarbiter.metrics.event import _MAP as event_mapping
from solarforecastarbiter.metrics.probabilistic import \
    _MAP as probabilistic_mapping
from solarforecastarbiter.metrics.summary import (
    _DETERMINISTIC_MAP as summary_deterministic_mapping,
    _EVENT_MAP as summary_event_mapping,
)
from solarforecastarbiter.validation.quality_mapping import \
    DESCRIPTION_MASK_MAPPING, DERIVED_MASKS


DASH_URL = 'https://dashboard.solarforecastarbiter.org'
ALLOWED_VARIABLES = {
    'air_temperature': 'degC',
    'wind_speed': 'm/s',
    'ghi': 'W/m^2',
    'dni': 'W/m^2',
    'dhi': 'W/m^2',
    'poa_global': 'W/m^2',
    'relative_humidity': '%',
    'ac_power': 'MW',
    'dc_power': 'MW',
    'availability': '%',
    'curtailment': 'MW',
    'event': 'boolean',
    'load': 'MW',
    'net_load': 'MW'
}


COMMON_NAMES = {
    'air_temperature': 'Air Temperature',
    'wind_speed': 'Wind Speed',
    'ghi': 'GHI',
    'dni': 'DNI',
    'dhi': 'DHI',
    'poa_global': 'Plane of Array Irradiance',
    'relative_humidity': 'Relative Humidty',
    'ac_power': 'AC Power',
    'dc_power': 'DC Power',
    'availability': 'Availability',
    'curtailment': 'Curtailment',
    'event': 'Event',
    'load': 'Load',
    'net_load': 'Net Load',
}


CLOSED_MAPPING = {
    'event': None,
    'instant': None,
    'beginning': 'left',
    'ending': 'right'
}
ALLOWED_INTERVAL_LABELS = tuple(CLOSED_MAPPING.keys())
ALLOWED_INTERVAL_VALUE_TYPES = (
    'interval_mean', 'interval_max', 'interval_min', 'interval_median',
    'instantaneous')
ALLOWED_AGGREGATE_TYPES = ('sum', 'mean', 'median', 'max', 'min', 'std')


# Keys are the categories passed to pandas groupby, values are the human
# readable versions for plotting and forms.
ALLOWED_CATEGORIES = {
    'total': 'Total',
    'year': 'Year',
    'season': 'Season',
    'month': 'Month of the year',
    'hour': 'Hour of the day',
    'date': 'Date',
    'weekday': 'Day of the week'
}


# sentences/paragraphs that will appear in the report
# under the heading in the key
CATEGORY_BLURBS = {
    'total': "Metric totals for the entire selected period.",
    'year': "Metrics per year.",
    'season': "Metrics per season.",
    'month': "Metrics per month.",
    'hour': "Metrics per hour of the day.",
    'date': "Metrics per individual date.",
    'weekday': "Metrics per day of the week."
}


ALLOWED_DETERMINISTIC_METRICS = {
    k: v[1] for k, v in deterministic_mapping.items()}

ALLOWED_EVENT_METRICS = {k: v[1] for k, v in event_mapping.items()}

ALLOWED_PROBABILISTIC_METRICS = {
    k: v[1] for k, v in probabilistic_mapping.items()}

ALLOWED_METRICS = ALLOWED_DETERMINISTIC_METRICS.copy()
ALLOWED_METRICS.update(ALLOWED_PROBABILISTIC_METRICS)
ALLOWED_METRICS.update(ALLOWED_EVENT_METRICS)

ALLOWED_DETERMINISTIC_SUMMARY_STATISTICS = {
    k: v[1] for k, v in summary_deterministic_mapping.items()
}
ALLOWED_EVENT_SUMMARY_STATISTICS = {
    k: v[1] for k, v in summary_event_mapping.items()
}
ALLOWED_SUMMARY_STATISTICS = ALLOWED_DETERMINISTIC_SUMMARY_STATISTICS.copy()
ALLOWED_SUMMARY_STATISTICS.update(ALLOWED_EVENT_SUMMARY_STATISTICS)


ALLOWED_COST_FUNCTIONS = tuple(_COST_FUNCTION_MAP.keys())
ALLOWED_COST_AGG_OPTIONS = tuple(_AGG_OPTIONS.keys())
ALLOWED_COST_FILL_OPTIONS = tuple(_FILL_OPTIONS.keys())

ALLOWED_QUALITY_FLAGS = tuple([
    k for k in DESCRIPTION_MASK_MAPPING.keys()
    if not k.startswith('VERSION IDENTIFIER') and
    not k.startswith("RESERVED")
]) + tuple(DERIVED_MASKS.keys())


def _time_conv(inp):
    if isinstance(inp, datetime.time):
        return inp.strftime('%H:%M')
    elif isinstance(inp, datetime.datetime):
        return inp.isoformat()
    elif isinstance(inp, pd.Timedelta):
        # convert to integer minutes
        return inp.total_seconds() // 60
    else:
        return inp


def _dict_factory(inp):
    dict_ = {}
    for k, v in dict(inp).items():
        if isinstance(v, tuple):
            dict_[k] = tuple(_time_conv(i) for i in v)
        elif isinstance(v, list):  # pragma: no cover
            dict_[k] = [_time_conv(i) for i in v]
        else:
            dict_[k] = _time_conv(v)
    if 'units' in dict_:
        del dict_['units']
    if 'constant_value_units' in dict_:
        del dict_['constant_value_units']
    if 'data_object' in dict_:
        del dict_['data_object']
    return dict_


def _single_field_processing(model, field, val, field_type=None):
    type_ = field_type or field.type
    if (
            # If the value is already the right type, return
            # typing type_s do not work with isinstance, so check __origin__
            not hasattr(type_, '__origin__') and
            isinstance(val, type_)
    ):
        return val
    elif type_ == pd.Timedelta:
        return pd.Timedelta(f'{val}min')
    elif type_ == pd.Timestamp:
        out = pd.Timestamp(val)
        if pd.isna(out):
            raise ValueError(f'{val} is not a time')
        return out
    elif type_ == datetime.time:
        return datetime.datetime.strptime(val, '%H:%M').time()
    elif (
            is_dataclass(type_) and
            isinstance(val, dict)
    ):
        return type_.from_dict(val)
    elif (
            hasattr(type_, '__origin__') and
            type_.__origin__ is Union
    ):
        # with a Union, we must return the right type
        for ntype in type_.__args__:
            try:
                processed_val = _single_field_processing(
                    model, field, val, ntype
                )
            except (TypeError, ValueError, KeyError):
                continue
            else:
                if not isinstance(processed_val, ntype):
                    continue
                else:
                    return processed_val
        raise TypeError(f'Unable to process {val} as one of {type_.__args__}')
    else:
        return model._special_field_processing(
            model, field, val)


class BaseModel:
    def _special_field_processing(self, model_field, val):
        return val

[docs] @classmethod def from_dict(model, input_dict, raise_on_extra=False): """ Construct a dataclass from the given dict, matching keys with the class fields. A KeyError is raised for any missing values. If raise_on_extra is True, an errors is raised if keys of the dict are also not fields of the dataclass. For pandas.Timedelta model fields, it is assumed input_dict contains a number representing minutes. For datetime.time model fields, input_dict values are assumed to be strings in the %H:%M format. If a modeling_parameters field is present, the modeling_parameters key from input_dict is automatically parsed into the appropriate PVModelingParameters subclass based on tracking_type. Parameters ---------- input_dict : dict The dict to process into dataclass fields raise_on_extra : boolean, default False If True, raise an exception on extra keys in input_dict that are not dataclass fields. Returns ------- model : subclass of BaseModel Instance of the desired model. Raises ------ KeyError For missing required fields or if raise_on_extra is True and input_dict contains extra keys. ValueError If a pandas.Timedelta, pandas.Timestamp, datetime.time, or modeling_parameters field cannot be parsed from the input_dict TypeError If the field has a Union type and the input parameter is not processed into one of the Union arguments """ dict_ = input_dict.copy() model_fields = fields(model) kwargs = {} errors = [] for model_field in model_fields: if model_field.name in dict_: field_val = dict_[model_field.name] if ( hasattr(model_field.type, '__origin__') and model_field.type.__origin__ is tuple ): out = [] default_type = model_field.type.__args__[0] for i, arg in enumerate(field_val): if ( i < len(model_field.type.__args__) and model_field.type.__args__[i] is not Ellipsis ): this_type = model_field.type.__args__[i] else: this_type = default_type out.append( _single_field_processing( model, model_field, arg, this_type)) kwargs[model_field.name] = tuple(out) else: kwargs[model_field.name] = _single_field_processing( model, model_field, field_val) elif ( model_field.default is MISSING and model_field.default_factory is MISSING and model_field.init ): errors.append(model_field.name) if errors: raise KeyError( 'Missing the following required arguments for the model ' f'{str(model)}: {", ".join(errors)}') names = [f.name for f in model_fields] extra = [k for k in dict_.keys() if k not in names] if extra and raise_on_extra: raise KeyError( f'Extra keys for the model {str(model)}: {", ".join(extra)}') return model(**kwargs)
[docs] def to_dict(self): """ Convert the dataclass into a dictionary suitable for uploading to the API. This means some types (such as pandas.Timedelta and times) are converted to strings. """ # using the dict_factory recurses through all objects for special # conversions dict_ = asdict(self, dict_factory=_dict_factory) return dict_
def replace(self, **kwargs): """ Convience wrapper for :py:func:`dataclasses.replace` to create a new dataclasses from the old with the given keys replaced. """ return replace(self, **kwargs)
[docs]@dataclass(frozen=True) class Site(BaseModel): """ Class for keeping track of Site metadata. Parameters ---------- name : str Name of the Site, e.g. Desert Rock latitude : float Latitude of the Site in decimal degrees north of the equator, e.g. 36.62373 longitude : float Longitude of the Site in decimal degrees east of the prime meridian, e.g. -116.01947 elevation : float Elevation of the Site in meters above mean sea level, e.g. 1007 timezone : str IANA timezone of the Site, e.g. Etc/GMT+8 site_id : str, optional UUID of the Site in the API provider : str, optional Provider of the Site information. extra_parameters : str, optional The extra parameters may be used by forecasters when implementing other PV models. The framework does not provide a standard set of extra parameters or require a particular format – these are up to the site owner. climate_zones : tuple of str, optional The climate zones that the site is within """ name: str latitude: float longitude: float elevation: float timezone: str site_id: str = '' provider: str = '' extra_parameters: str = '' climate_zones: Tuple[str, ...] = () @classmethod def from_dict(model, input_dict, raise_on_extra=False): dict_ = input_dict.copy() if 'modeling_parameters' in dict_: mp_dict = dict_.get('modeling_parameters', {}) if not isinstance(mp_dict, PVModelingParameters): tracking_type = mp_dict.get('tracking_type', None) if tracking_type == 'fixed': dict_['modeling_parameters'] = ( FixedTiltModelingParameters.from_dict( mp_dict)) return SolarPowerPlant.from_dict(dict_, raise_on_extra) elif tracking_type == 'single_axis': dict_['modeling_parameters'] = ( SingleAxisModelingParameters.from_dict( mp_dict)) return SolarPowerPlant.from_dict(dict_, raise_on_extra) elif tracking_type is not None: raise ValueError( 'tracking_type must be None, fixed, or ' 'single_axis') return super().from_dict(dict_, raise_on_extra)
[docs]@dataclass(frozen=True) class PVModelingParameters(BaseModel): """ Class for keeping track of generic PV modeling parameters Parameters ---------- ac_capacity : float Nameplate AC power rating in megawatts dc_capacity : float Nameplate DC power rating in megawatts temperature_coefficient : float The temperature coefficient of DC power in units of %/C. Typically -0.2 to -0.5 % per degree C. dc_loss_factor : float Applied to DC current in units of %. 0 = no loss. ac_loss_factor : float Appled to inverter power output in units of %. 0 = no loss. See Also -------- :py:class:`solarforecastarbiter.datamodel.FixedTiltModelingParameters` :py:class:`solarforecastarbiter.datamodel.SingleAxisModelingParameters` """ ac_capacity: float dc_capacity: float temperature_coefficient: float dc_loss_factor: float ac_loss_factor: float
[docs]@dataclass(frozen=True) class FixedTiltModelingParameters(PVModelingParameters): """ A class based on PVModelingParameters that has additional parameters for fixed tilt PV systems. Parameters ---------- surface_tilt : float Tilt from horizontal of a fixed tilt system, degrees surface_azimuth : float Azimuth angle of a fixed tilt system, degrees East of North See Also -------- :py:class:`solarforecastarbiter.datamodel.PVModelingParameters` """ surface_tilt: float surface_azimuth: float tracking_type: str = 'fixed'
[docs]@dataclass(frozen=True) class SingleAxisModelingParameters(PVModelingParameters): """ A class based on PVModelingParameters that has additional parameters for single axis tracking systems. Parameters ---------- axis_tilt : float Tilt from horizontal of the tracker axis, degrees axis_azimuth : float Azimuth angle of the tracker axis, degrees East of North ground_coverage_ratio : float Ratio of total width of modules on a tracker to the distance between tracker axes. For example, for trackers each with two modules of 1m width each, and a spacing between tracker axes of 7m, the ground coverage ratio is 0.286(=2/7). backtrack : bool Indicator of if a tracking system uses backtracking max_rotation_angle : float maximum rotation from horizontal of a single axis tracker, degrees See Also -------- :py:class:`solarforecastarbiter.datamodel.PVModelingParameters` """ axis_tilt: float axis_azimuth: float ground_coverage_ratio: float backtrack: bool max_rotation_angle: float tracking_type: str = 'single_axis'
[docs]@dataclass(frozen=True) class SolarPowerPlant(Site): """ Class for keeping track of metadata associated with solar power plant Sites. Adds additional parameters to the Site dataclass. Parameters ---------- modeling_parameters : PVModelingParameters Modeling parameters for a single axis system See Also -------- :py:class:`solarforecastarbiter.datamodel.Site` :py:class:`solarforecastarbiter.datamodel.SingleAxisModelingParameters` :py:class:`solarforecastarbiter.datamodel.FixedTiltModelingParameters` """ modeling_parameters: PVModelingParameters = field( default_factory=PVModelingParameters)
def __set_units__(cls): if cls.variable not in ALLOWED_VARIABLES: raise ValueError('variable %s is not allowed' % cls.variable) object.__setattr__(cls, 'units', ALLOWED_VARIABLES[cls.variable]) def __generic_oneof__(cls, field, allowed): if getattr(cls, field) not in allowed: raise ValueError(f'{field} must be one of {allowed}') def __check_interval_params__(cls): __generic_oneof__( cls, 'interval_label', ALLOWED_INTERVAL_LABELS) __generic_oneof__( cls, 'interval_value_type', ALLOWED_INTERVAL_VALUE_TYPES)
[docs]@dataclass(frozen=True) class Observation(BaseModel): """ A class for keeping track of metadata associated with an observation. Units are set according to the variable type. Parameters ---------- name : str Name of the Observation variable : str Variable name, e.g. power, GHI. Each allowed variable has an associated pre-defined unit. interval_value_type : str The type of the data in the observation. Typically interval_mean or instantaneous, but additional types may be defined for events. interval_length : pandas.Timedelta The length of time between consecutive data points, e.g. 5 minutes, 1 hour. interval_label : {'beginning', 'ending', 'instant', 'event'} Indicates if a time labels the beginning or the ending of an interval average, indicates an instantaneous value, or indicates an event. site : Site The site that this Observation was generated for. uncertainty : float or None A measure of the uncertainty of the observation values. The format will be determined later. None indicates that the uncertainty is unknown for this Observation. observation_id : str, optional UUID of the observation in the API provider : str, optional Provider of the Observation information. extra_parameters : str, optional Any extra parameters for the observation See Also -------- :py:class:`solarforecastarbiter.datamodel.Site` """ __blurb__: ClassVar[str] = 'Observation' name: str variable: str interval_value_type: str interval_length: pd.Timedelta interval_label: str site: Site uncertainty: Union[float, None] observation_id: str = '' provider: str = '' extra_parameters: str = '' units: str = field(init=False) def __post_init__(self): __set_units__(self) __check_interval_params__(self)
[docs]@dataclass(frozen=True) class AggregateObservation(BaseModel): """ Class for keeping track of an Observation and when it is added and (optionally) removed from an Aggregate. This metadata allows the Arbiter to calculate the correct quantities while the Aggregate grows or shrinks over time. Parameters ---------- observation : Observation The Observation object that is part of the Aggregate effective_from : pandas.Timestamp The datetime of when the Observation should be included in the Aggregate effective_until : pandas.Timestamp The datetime of when the Observation should be excluded from the Aggregate observation_deleted_at : pandas.Timestamp The datetime that the Observation was deleted from the Arbiter. This indicates that the Observation should be removed from the Aggregate, and without the data from this Observation, the Aggregate is invalid before this time. See Also -------- :py:class:`solarforecastarbiter.datamodel.Observation` :py:class:`solarforecastarbiter.datamodel.Aggregate` """ __blurb__: ClassVar[str] = 'Aggregate Observation' observation: Observation effective_from: pd.Timestamp effective_until: Union[pd.Timestamp, None] = None observation_deleted_at: Union[pd.Timestamp, None] = None
def __check_variable__(variable, *args): if not all(arg.variable == variable for arg in args): raise ValueError('All variables must be identical.') def __check_aggregate_interval_compatibility__(interval, *args): if any(arg.interval_length > interval for arg in args): raise ValueError('observation.interval_length cannot be greater than ' 'aggregate.interval_length.') if any(arg.interval_value_type not in ('interval_mean', 'instantaneous') for arg in args): raise ValueError('Only observations with interval_value_type of ' 'interval_mean or instantaneous are acceptable')
[docs]@dataclass(frozen=True) class Aggregate(BaseModel): """ Class for keeping track of Aggregate metadata. Aggregates always have interval_value_type of 'interval_mean'. Parameters ---------- name : str Name of the Aggregate, e.g. Utility X Solar PV description : str A description of what the aggregate is. variable : str Variable name, e.g. power, GHI. Each allowed variable has an associated pre-defined unit. All observations that make up the Aggregate must also have this variable. aggregate_type : str The aggregation function that will be applied to observations. Generally, this will be 'sum' although one might be interested, for example, in the 'mean' irradiance of some observations. May be an aggregate function string supported by Pandas. Common options include ('sum', 'mean', 'min', 'max', 'median', 'std'). interval_length : pandas.Timedelta The length of time between consecutive data points, e.g. 5 minutes, 1 hour. This must be >= the interval lengths of any Observations that will make up the Aggregate. interval_label : str Indicates if a time labels the beginning or the ending of an interval average. timezone : str IANA timezone of the Aggregate, e.g. Etc/GMT+8 aggregate_id : str, optional UUID of the Aggregate in the API provider : str, optional Provider of the Aggregate information. extra_parameters : str, optional Any extra parameters for the Aggregate. observations : tuple of AggregateObservation The Observations that contribute to the Aggregate See Also -------- :py:class:`solarforecastarbiter.datamodel.Observation` """ __blurb__: ClassVar[str] = 'Aggregate' name: str description: str variable: str aggregate_type: str interval_length: pd.Timedelta interval_label: str timezone: str observations: Tuple[AggregateObservation, ...] aggregate_id: str = '' provider: str = '' extra_parameters: str = '' units: str = field(init=False) interval_value_type: str = field(default='interval_mean') def __post_init__(self): __set_units__(self) observations = [ ao.observation for ao in self.observations if ao.observation is not None] __check_variable__( self.variable, *observations) __check_aggregate_interval_compatibility__( self.interval_length, *observations) __generic_oneof__(self, 'aggregate_type', ALLOWED_AGGREGATE_TYPES) __generic_oneof__(self, 'interval_label', ('beginning', 'ending')) object.__setattr__(self, 'interval_value_type', 'interval_mean')
@dataclass(frozen=True) class _ForecastBase: name: str issue_time_of_day: datetime.time lead_time_to_start: pd.Timedelta interval_length: pd.Timedelta run_length: pd.Timedelta interval_label: str interval_value_type: str variable: str @dataclass(frozen=True) class _ForecastDefaultsBase: site: Union[Site, None] = None aggregate: Union[Aggregate, None] = None forecast_id: str = '' provider: str = '' extra_parameters: str = '' units: str = field(init=False) def __site_or_agg__(cls): if cls.site is not None and cls.aggregate is not None: raise KeyError('Only provide one of "site" or "aggregate" to Forecast') elif cls.site is None and cls.aggregate is None: raise KeyError('Must provide one of "site" or "aggregate" to Forecast') # Follow MRO pattern in https://stackoverflow.com/a/53085935/2802993 # to avoid problems with inheritance in ProbabilisticForecasts
[docs]@dataclass(frozen=True) class Forecast(BaseModel, _ForecastDefaultsBase, _ForecastBase): """ A class to hold metadata for Forecast objects. Parameters ---------- name : str Name of the Forecast issue_time_of_day : datetime.time The time of day that a forecast run is issued, e.g. 00:30. For forecast runs issued multiple times within one day (e.g. hourly), this specifies the first issue time of day. Additional issue times are uniquely determined by the first issue time and the run length & issue frequency attribute. This is assumed to be a UTC time. lead_time_to_start : pandas.Timedelta The difference between the issue time and the start of the first forecast interval, e.g. 1 hour. interval_length : pandas.Timedelta The length of time between consecutive data points, e.g. 5 minutes, 1 hour. run_length : pandas.Timedelta The total length of a single issued forecast run, e.g. 1 hour. To enforce a continuous, non-overlapping sequence, this is equal to the forecast run issue frequency. interval_label : {"beginning", "ending", "instant"} Indicates if a time labels the beginning or the ending of an interval average, or indicates an instantaneous value, e.g. beginning, ending, instant. interval_value_type : str The type of the data in the forecast, e.g. mean, max, 95th percentile. variable : str The variable in the forecast, e.g. power, GHI, DNI, event. Each variable is associated with a standard unit. site : Site or None The predefined site that the forecast is for, e.g. Power Plant X. aggregate : Aggregate or None The predefined aggregate that the forecast is for, e.g. Aggregate Y. forecast_id : str, optional UUID of the forecast in the API provider : str, optional Provider of the Forecast information. extra_parameters : str, optional Extra configuration parameters of forecast. See Also -------- :py:class:`solarforecastarbiter.datamodel.Site` :py:class:`solarforecastarbiter.datamodel.Aggregate` """ __blurb__: ClassVar[str] = 'Forecast' def __post_init__(self): __set_units__(self) __site_or_agg__(self) __check_interval_params__(self) @classmethod def from_dict(model, input_dict, raise_on_extra=False): dict_ = input_dict.copy() if model != Forecast: return super().from_dict(dict_, raise_on_extra) if dict_.get('constant_value', None) is not None: return ProbabilisticForecastConstantValue.from_dict( dict_, raise_on_extra) elif dict_.get('constant_values', None) is not None: return ProbabilisticForecast.from_dict(dict_, raise_on_extra) elif dict_.get('variable') == 'event': return EventForecast.from_dict(dict_, raise_on_extra) else: return super().from_dict(dict_, raise_on_extra)
[docs]@dataclass(frozen=True) class EventForecast(Forecast): """ Extends Forecast dataclass to include event forecast attributes. Parameters ---------- name : str Name of the Forecast issue_time_of_day : datetime.time The time of day that a forecast run is issued, e.g. 00:30. For forecast runs issued multiple times within one day (e.g. hourly), this specifies the first issue time of day. Additional issue times are uniquely determined by the first issue time and the run length & issue frequency attribute. This is assumed to be a UTC time. lead_time_to_start : pandas.Timedelta The difference between the issue time and the start of the first forecast interval, e.g. 1 hour. interval_length : pandas.Timedelta The length of time between consecutive data points, e.g. 5 minutes, 1 hour. run_length : pandas.Timedelta The total length of a single issued forecast run, e.g. 1 hour. To enforce a continuous, non-overlapping sequence, this is equal to the forecast run issue frequency. interval_label : {'event'} interval_value_type : str The type of the data in the forecast, e.g. mean, max, 95th percentile. variable : {'event'} site : Site or None The predefined site that the forecast is for, e.g. Power Plant X. aggregate : Aggregate or None The predefined aggregate that the forecast is for, e.g. Aggregate Y. forecast_id : str, optional UUID of the forecast in the API provider : str, optional Provider of the Forecast information. extra_parameters : str, optional Extra configuration parameters of forecast. See also -------- :py:class:`solarforecastarbiter.datamodel.Forecast` """ __blurb__: ClassVar[str] = 'Event Forecast' def __post_init__(self): if self.interval_label != "event": raise ValueError("Interval label must be 'event'") elif self.variable != "event": raise ValueError("Variable must be 'event'") super().__post_init__()
def __set_constant_value_units__(cls): if cls.axis == 'x': # e.g. Prob(o < 10 MW). Forecast is in %, constant value is 10 MW object.__setattr__(cls, 'constant_value_units', cls.units) object.__setattr__(cls, 'units', '%') else: # e.g. Prob(o < f) = 90%. Forecast in units of obs, constant value is % object.__setattr__(cls, 'constant_value_units', '%') @dataclass(frozen=True) class _ProbabilisticForecastConstantValueBase: axis: str constant_value: float constant_value_units: str = field(init=False)
[docs]@dataclass(frozen=True) class ProbabilisticForecastConstantValue( Forecast, _ProbabilisticForecastConstantValueBase): """ Extends Forecast dataclass to include probabilistic forecast attributes. Parameters ---------- name : str Name of the Forecast issue_time_of_day : datetime.time The time of day that a forecast run is issued, e.g. 00:30. For forecast runs issued multiple times within one day (e.g. hourly), this specifies the first issue time of day. Additional issue times are uniquely determined by the first issue time and the run length & issue frequency attribute. This is assumed to be a UTC time. lead_time_to_start : pandas.Timedelta The difference between the issue time and the start of the first forecast interval, e.g. 1 hour. interval_length : pandas.Timedelta The length of time between consecutive data points, e.g. 5 minutes, 1 hour. run_length : pandas.Timedelta The total length of a single issued forecast run, e.g. 1 hour. To enforce a continuous, non-overlapping sequence, this is equal to the forecast run issue frequency. interval_label : str Indicates if a time labels the beginning or the ending of an interval average, or indicates an instantaneous value, e.g. beginning, ending, instant. interval_value_type : str The type of the data in the forecast, e.g. mean, max, 95th percentile. variable : str The variable in the forecast, e.g. power, GHI, DNI. Each variable is associated with a standard unit. site : Site or None The predefined site that the forecast is for, e.g. Power Plant X. aggregate : Aggregate or None The predefined aggregate that the forecast is for, e.g. Aggregate Y. axis : str The axis on which the constant values of the CDF is specified. The axis can be either *x* (constant variable values) or *y* (constant percentiles). constant_value : float The variable value or percentile. forecast_id : str, optional UUID of the forecast in the API provider : str, optional Provider of the ProbabilisticForecastConstantValue information. extra_parameters : str, optional Extra configuration parameters of forecast. See also -------- :py:class:`solarforecastarbiter.datamodel.ProbabilisticForecast` """ __blurb__: ClassVar[str] = 'Probabilistic Forecast Constant Value' def __post_init__(self): super().__post_init__() __check_axis__(self.axis) __set_constant_value_units__(self)
@dataclass(frozen=True) class _ProbabilisticForecastBase: axis: str constant_values: Tuple[Union[ProbabilisticForecastConstantValue, float, int], ...] # NOQA constant_value_units: str = field(init=False)
[docs]@dataclass(frozen=True) class ProbabilisticForecast( Forecast, _ProbabilisticForecastBase): """ Tracks a group of ProbabilisticForecastConstantValue objects that together describe 1 or more points of the same probability distribution. Parameters ---------- name : str Name of the Forecast issue_time_of_day : datetime.time The time of day that a forecast run is issued, e.g. 00:30. For forecast runs issued multiple times within one day (e.g. hourly), this specifies the first issue time of day. Additional issue times are uniquely determined by the first issue time and the run length & issue frequency attribute. This is assumed to be a UTC time. lead_time_to_start : pandas.Timedelta The difference between the issue time and the start of the first forecast interval, e.g. 1 hour. interval_length : pandas.Timedelta The length of time between consecutive data points, e.g. 5 minutes, 1 hour. run_length : pandas.Timedelta The total length of a single issued forecast run, e.g. 1 hour. To enforce a continuous, non-overlapping sequence, this is equal to the forecast run issue frequency. interval_label : str Indicates if a time labels the beginning or the ending of an interval average, or indicates an instantaneous value, e.g. beginning, ending, instant. interval_value_type : str The type of the data in the forecast, e.g. mean, max, 95th percentile. variable : str The variable in the forecast, e.g. power, GHI, DNI. Each variable is associated with a standard unit. site : Site or None The predefined site that the forecast is for, e.g. Power Plant X. aggregate : Aggregate or None The predefined aggregate that the forecast is for, e.g. Aggregate Y. axis : str The axis on which the constant values of the CDF is specified. The axis can be either *x* (constant variable values) or *y* (constant percentiles). constant_values : tuple of ProbabilisticForecastConstantValue or float The variable values or percentiles. Floats will automatically be converted to ProbabilisticForecastConstantValue objects. forecast_id : str, optional UUID of the forecast in the API provider : str, optional Provider of the ProbabilisticForecast information. extra_parameters : str, optional Extra configuration parameters of forecast. See also -------- ProbabilisticForecastConstantValue Forecast """ __blurb__: ClassVar[str] = 'Probabilistic Forecast' def __post_init__(self): super().__post_init__() __check_axis__(self.axis) __set_constant_value_units__(self) __set_constant_values__(self) __check_axis_consistency__(self.axis, self.constant_values)
def __validate_cost__(index_var): def val(obj): if hasattr(obj, 'fill'): fillkeys = ALLOWED_COST_FILL_OPTIONS if obj.fill not in fillkeys: raise ValueError( f"Cost 'fill' must be one of {str(fillkeys)}") if hasattr(obj, 'aggregation'): aggkeys = ALLOWED_COST_AGG_OPTIONS if obj.aggregation not in aggkeys: raise ValueError( f"Cost 'aggregation' must be one of {str(aggkeys)}") if index_var is not None: if len(obj.cost) != len(getattr(obj, index_var)): raise ValueError( f"'cost' and '{index_var}' must have the same length") return val
[docs]@dataclass(frozen=True) class TimeOfDayCost(BaseModel): """Cost values based on the time of day. Parameters ---------- times : tuple of datetime.time The times to associate with each cost value cost : tuple of float The cost per unit error of the forecasted variable for each time. Must have the same length as `times`. aggregation : str Aggregation method to use after calculating cost for the error series. Currently only 'sum' or 'mean' are available. net : bool If True, compute the 'net' aggregate error instead of first calcuating the absolute error before performing the aggregation. fill : str Fill method to apply for times between those specified in `times`. Options are 'forward' or 'backward'. timezone : str, default None IANA timezone string to use when constructing datetimes. If None, the timezone of the observations is used, which is the report timezone when calculated in a report. """ times: Tuple[datetime.time, ...] cost: Tuple[float, ...] aggregation: str net: bool fill: str timezone: str = None __post_init__ = __validate_cost__('times')
[docs]@dataclass(frozen=True) class DatetimeCost(BaseModel): """Cost values based on datetimes. Parameters ---------- datetimes : tuple/iterable of datetime-like objects The datetimes to associate with each cost value cost : tuple of float The cost per unit error of the forecasted variable for each datetime. Must have the same length as `datetimes`. aggregation : str Aggregation method to use after calculating cost for the error series. Currently only 'sum' or 'mean' are available. net : bool If True, compute the 'net' aggregate error instead of first calcuating the absolute error before performing the aggregation. fill : str Fill method to apply for datetimes between those specified in `datetimes`. Options are 'forward' or 'backward'. timezone : str, default None IANA timezone string to use when constructing datetimes. If None, the timezone of the observations is used, which is the report timezone when calculated in a report. """ datetimes: Tuple[pd.Timestamp, ...] cost: Tuple[float, ...] aggregation: str net: bool fill: str timezone: str = None __post_init__ = __validate_cost__('datetimes')
[docs]@dataclass(frozen=True) class ConstantCost(BaseModel): """A constant cost per unit error of the forecasted variable Parameters ---------- cost : float aggregation : str Aggregation method to use after calculating cost for the error series. Currently only 'sum' or 'mean' are available. net : bool If True, compute the 'net' aggregate error instead of first calcuating the absolute error before performing the aggregation. """ cost: float aggregation: str net: bool __post_init__ = __validate_cost__(None)
[docs]@dataclass(frozen=True) class CostBand(BaseModel): """Cost specification for one error band Parameters ---------- error_range : tuple(float, float) Bounds of the error to apply the specified cost function to. Inf and -Inf are valid range points, and the error may be positive or negative. Inclusion/exclusion of endpoints is determined by ordering in :py:class:`solarforecastarbiter.datamodel.ErrorBandCost`. cost_function : str One of 'timeofday', 'datetime', or 'constant'. Specifies which cost model should be used to calculate the cost in this band. cost_function_parameters : :py:class:`solarforecastarbiter.datamodel.ConstantCost` or :py:class:`solarforecastarbiter.TimeOfDayCost` or :py:class:`solarforecastarbiter.DatetimeCost` Parameters for the selected cost function. """ # NOQA: E501 error_range: Tuple[float, float] cost_function: str cost_function_parameters: Union[TimeOfDayCost, DatetimeCost, ConstantCost] def _special_field_processing(self, model_field, val): # support passing "inf", "-inf" as strings via json/dict if model_field.name == 'error_range': return float(val) else: # pragma: no cover return val def __post_init__(self): if self.cost_function == 'timeofday': if not isinstance(self.cost_function_parameters, TimeOfDayCost): raise TypeError( "'cost_function_parameters' must be of type TimeOfDayCost " "for 'timeofday' cost function.") elif self.cost_function == 'datetime': if not isinstance(self.cost_function_parameters, DatetimeCost): raise TypeError( "'cost_function_parameters' must be of type DatetimeCost " "for 'datetime' cost function.") elif self.cost_function == 'constant': if not isinstance(self.cost_function_parameters, ConstantCost): raise TypeError( "'cost_function_parameters' must be of type ConstantCost " "for 'constant' cost function.") else: raise ValueError( "'cost_function' must be one of 'timeofday', 'datetime', or" " 'constant'")
[docs]@dataclass(frozen=True) class ErrorBandCost(BaseModel): """Cost that varies based on the error value. For each error band, one of the other cost functions is applied to the errors within the band. If an error value does not fall within any band ranges, no cost is calculated for that error. Parameters ---------- bands : tuple of :py:class:`solarforecastarbiter.datamodel.CostBand` Specification of the error bands and associated cost functions. Notes ----- Each error is restricted to a single band/cost function, so the order in bands determines which band is applied in ascending priority. For example, if ``bands[0].error_range = (0, 2)`` and ``bands[1].error_range == (1, 3)``, the cost function of bands[0] is applied for all errors from [0, 2] and bands[1] is applied for errors from (2, 3]. """ bands: Tuple[CostBand, ...]
[docs]@dataclass(frozen=True) class Cost(BaseModel): """Specify how cost metrics should be calculated. Parameters ---------- name : str Identifier for these cost parameters type : str The type of cost parameters that are included in `parameters`. One of 'timeofday', 'datetime', 'constant', or 'errorband'. parameters : :py:class:`solarforecastarbiter.datamodel.ConstantCost` or :py:class:`solarforecastarbiter.TimeOfDayCost` or :py:class:`solarforecastarbiter.DatetimeCost` or :py:class:`solarforecastarbiter.ErrorBandCost` Parameters for the specific cost function type. """ # NOQA: E501 name: str type: str parameters: Union[TimeOfDayCost, DatetimeCost, ConstantCost, ErrorBandCost] def __post_init__(self): if self.type not in ALLOWED_COST_FUNCTIONS: raise ValueError( f"'type' must be one of {ALLOWED_COST_FUNCTIONS}") @classmethod def from_dict(model, input_dict, raise_on_extra=False): dict_ = input_dict.copy() type_ = dict_['type'] param_dict = dict_.get('parameters', {}) if type_ == 'timeofday': dict_['parameters'] = TimeOfDayCost.from_dict(param_dict) elif type_ == 'datetime': dict_['parameters'] = DatetimeCost.from_dict(param_dict) elif type_ == 'constant': dict_['parameters'] = ConstantCost.from_dict(param_dict) elif type_ == 'errorband': dict_['parameters'] = ErrorBandCost.from_dict(param_dict) else: raise ValueError( f"'type' must be one of {ALLOWED_COST_FUNCTIONS}") return super().from_dict(dict_, raise_on_extra)
def __set_constant_values__(self): out = [] for cv in self.constant_values: if isinstance(cv, ProbabilisticForecastConstantValue): out.append(cv) elif isinstance(cv, (float, int)): cv_dict = self.to_dict() cv_dict.pop('forecast_id', None) cv_dict['constant_value'] = cv out.append( ProbabilisticForecastConstantValue.from_dict(cv_dict)) else: raise TypeError( f'Invalid type for a constant value {cv}. ' 'Must be int, float, or ProbablisticConstantValue') object.__setattr__(self, 'constant_values', tuple(out)) def __check_axis__(axis): if axis not in ('x', 'y'): raise ValueError('Axis must be x or y') def __check_axis_consistency__(axis, constant_values): if not all(arg.axis == axis for arg in constant_values): raise ValueError('All axis attributes must be identical') def __check_units__(*args): if len(args) == 0: return unique_units = set() for arg in args: if getattr(arg, 'axis', None) == 'x': unique_units.add(arg.constant_value_units) else: unique_units.add(arg.units) if len(unique_units) > 1: raise ValueError('All units must be identical.') def __check_interval_compatibility__(forecast, observation): if observation.interval_length > forecast.interval_length: raise ValueError('observation.interval_length cannot be greater than ' 'forecast.interval_length.') if ('instant' in forecast.interval_label and 'instant' not in observation.interval_label): raise ValueError('Instantaneous forecasts cannot be evaluated against ' 'interval average observations.')
[docs]@dataclass(frozen=True) class ForecastObservation(BaseModel): """ Class for pairing Forecast and Observation objects for evaluation. Parameters ---------- forecast: :py:class:`solarforecastarbiter.datamodel.Forecast` observation: :py:class:`solarforecastarbiter.datamodel.Observation` reference_forecast: :py:class:`solarforecastarbiter.datamodel.Forecast` or None normalization: float or None If None, determined by __set_normalization__ uncertainty: None, float, or str If None, uncertainty is not accounted for. Float specifies the uncertainty as a percentage from 0 to 100%. If str, may be 'observation_uncertainty' to indicate that the value should be set to ``observation.uncertainty``, or may be coerceable to a float. cost: str or None Cost parameters to use from the costs associated with ReportParameters """ # NOQA forecast: Forecast observation: Observation reference_forecast: Union[Forecast, None] = None # some function applied to observation (e.g. mean per day) # possible in future. maybe add pd.Series like for # ProcessedForecastObservation normalization: Union[float, None] = None uncertainty: Union[None, float, str] = None cost: Union[str, None] = None data_object: Observation = field(init=False) def __post_init__(self): __set_normalization__(self) __set_uncertainty__(self) object.__setattr__(self, 'data_object', self.observation) __check_units__(self.forecast, self.data_object) __check_interval_compatibility__(self.forecast, self.data_object)
def __set_normalization__(self): if self.normalization is None: if self.observation.variable == 'ac_power': norm = self.observation.site.modeling_parameters.ac_capacity elif self.observation.variable == 'dc_power': norm = self.observation.site.modeling_parameters.dc_capacity elif self.observation.units == 'W/m^2': # normalizing by 1000 W/m^2 was considered and rejected # https://github.com/SolarArbiter/solarforecastarbiter-core/pull/379#discussion_r402434134 # keep W/m^2 as separate item for likely future improvements norm = np.nan else: norm = np.nan else: # norm was supplied, but we're going to make sure it can coerced # to a float norm = self.normalization norm = float(norm) # from_dict only checks for floats, chokes on ints object.__setattr__(self, 'normalization', norm) def __set_aggregate_normalization__(self): # https://github.com/SolarArbiter/solarforecastarbiter-core/issues/381 norm = np.nan object.__setattr__(self, 'normalization', norm) def __set_uncertainty__(self): if isinstance(self.uncertainty, str): try: unc = float(self.uncertainty) except ValueError: if self.uncertainty == 'observation_uncertainty': object.__setattr__( self, 'uncertainty', self.observation.uncertainty) else: # easy to mistype 'observation_uncertainty', so be helpful raise ValueError( ('Invalid uncertainty %s. uncertainty must be set to None,' ' a float, or "observation_uncertainty"') % self.uncertainty) else: object.__setattr__(self, 'uncertainty', unc)
[docs]@dataclass(frozen=True) class ForecastAggregate(BaseModel): """ Class for pairing Forecast and Aggregate objects for evaluation. Parameters ---------- forecast: :py:class:`solarforecastarbiter.datamodel.Forecast` aggregate: :py:class:`solarforecastarbiter.datamodel.Aggregate` reference_forecast: :py:class:`solarforecastarbiter.datamodel.Forecast` or None normalization: float or None If None, assigned 1. uncertainty: None, float, or str If None, uncertainty is not accounted for. Float specifies the uncertainty as a percentage from 0 to 100%. Strings must be coerceable to a float. cost: str or None Cost parameters to use from the costs associated with ReportParameters """ # NOQA forecast: Forecast aggregate: Aggregate reference_forecast: Union[Forecast, None] = None normalization: Union[float, None] = None uncertainty: Union[float, None] = None cost: Union[str, None] = None data_object: Aggregate = field(init=False) def __post_init__(self): if self.normalization is None: __set_aggregate_normalization__(self) if self.uncertainty is not None: object.__setattr__(self, 'uncertainty', float(self.uncertainty)) object.__setattr__(self, 'data_object', self.aggregate) __check_units__(self.forecast, self.data_object) __check_interval_compatibility__(self.forecast, self.data_object)
[docs]@dataclass(frozen=True) class BaseFilter(BaseModel): """ Base class for filters to be applied in a report. """ @classmethod def from_dict(model, input_dict, raise_on_extra=False): dict_ = input_dict.copy() if model != BaseFilter: return super().from_dict(dict_, raise_on_extra) if 'quality_flags' in dict_: return QualityFlagFilter.from_dict(dict_, raise_on_extra) elif 'time_of_day_range' in dict_: return TimeOfDayFilter.from_dict(dict_, raise_on_extra) elif 'value_range' in dict_: return ValueFilter.from_dict(dict_, raise_on_extra) else: raise NotImplementedError( 'Do not know how to process dict into a Filter.')
[docs]@dataclass(frozen=True) class QualityFlagFilter(BaseFilter): """Quality flag filters to be applied in a report. Parameters ---------- quality_flags : Tuple of str Strings corresponding to ``BITMASK_DESCRIPTION_DICT`` or ``DERIVED_MASKS`` keys. These periods will be excluded from the analysis. discard_before_resample : bool, default True Determines if points should be discarded before resampling or only during resampling (when ``resample_threshold_percentage`` is exceeded). resample_threshold_percentage : float, default 10. The percentage of points in a resampled interval that must be flagged for the resampled interval to be flagged. Notes ----- If ``discard_before_resample`` is ``True``, the ``quality_flags`` are applied to the data before it is resampled. During resampling, intervals are discarded if ``resample_threshold_percentage`` is exceeded. If ``discard_before_resample`` is ``False``, the ``quality_flags`` are only considered during the resampling operation. The ``quality_flags`` of the raw observations are combined with ``OR``, the total number of flagged points within a resample period is computed, and intervals are discarded where ``resample_threshold_percentage`` is exceeded. """ quality_flags: Tuple[str, ...] = ( 'UNEVEN FREQUENCY', 'LIMITS EXCEEDED', 'CLEARSKY EXCEEDED', 'DAYTIME STALE VALUES', 'INCONSISTENT IRRADIANCE COMPONENTS' ) discard_before_resample: bool = True resample_threshold_percentage: float = 10. def __post_init__(self): if not all(flag in ALLOWED_QUALITY_FLAGS for flag in self.quality_flags): raise ValueError('Quality flags must be in ' 'BITMASK_DESCRIPTION_DICT or DERIVED_MASKS')
[docs]@dataclass(frozen=True) class TimeOfDayFilter(BaseFilter): """ Class representing a time of day filter to be applied in a report. Parameters ---------- time_of_day_range : (datetime.time, datetime.time) tuple Time of day range to calculate errors. Range is inclusive of both endpoints. Do not use this to exclude nighttime; instead set the corresponding quality_flag. """ time_of_day_range: Tuple[datetime.time, datetime.time]
# add these? # discard_before_resample: bool = True # resample_threshold_percentage: float = 10.
[docs]@dataclass(frozen=True) class ValueFilter(BaseFilter): """ Class representing an observation or forecast value filter to be applied in a report. Parameters ---------- metadata : :py:class:`solarforecastarbiter.datamodel.Forecast` or :py:class:`solarforecastarbiter.datamodel.Observation` Object to get values for. value_range : (float, float) tuple Value range to calculate errors. Range is inclusive of both endpoints. Filters are applied before resampling. """ # NOQA # TODO: implement. Also add Aggregate metadata: Union[Observation, Forecast] value_range: Tuple[float, float]
# add these? # discard_before_resample: bool = True # resample_threshold_percentage: float = 10. def __check_metrics__(fx, metrics): """Validate metrics selection. Check that the selected metrics are valid for the given scenario (e.g. if deterministic forecasts, then deterministic metrics). Parameters ---------- fx : Forecast, ProbabilisticForecast, ProbabilisticForecastConstantValue Forecast to be evaluated by metrics. metrics : Tuple of str Metrics to be computed in the report. Returns ------- None Raises ------ ValueError If the selected metrics are not valid for the given forecast type. """ if isinstance(fx, (ProbabilisticForecast, ProbabilisticForecastConstantValue)): if not set(metrics) <= ALLOWED_PROBABILISTIC_METRICS.keys(): raise ValueError("Metrics must be in " "ALLOWED_PROBABILISTIC_METRICS.") elif isinstance(fx, EventForecast): if not set(metrics) <= ALLOWED_EVENT_METRICS.keys(): raise ValueError("Metrics must be in " "ALLOWED_EVENT_METRICS.") elif isinstance(fx, Forecast): if not set(metrics) <= ALLOWED_DETERMINISTIC_METRICS.keys(): raise ValueError("Metrics must be in " "ALLOWED_DETERMINISTIC_METRICS.") def __check_categories__(categories): if not set(categories) <= ALLOWED_CATEGORIES.keys(): raise ValueError('Categories must be in ALLOWED_CATEGORIES')
[docs]@dataclass(frozen=True) class ValidationResult(BaseModel): """Store the validation result for a flag or combination of flags. Parameters ---------- flag: str The quality flag(s) being recorded. See :py:mod:`solarforecastarbiter.validation.quality_mapping`. count: int The number of timestamps that were flagged. before_resample: bool If the flag was applied before resampling. """ flag: str count: int before_resample: bool = True
[docs]@dataclass(frozen=True) class PreprocessingResult(BaseModel): """Stores summary information to record preprocessing results that detail how data has been handled. Parameters ---------- name: str The human readable name noting the process and data applied. count: int The number of timestamps that were managed in the process. """ name: str count: int
# need apply filtering + resampling to each forecast obs pair
[docs]@dataclass(frozen=True) class ProcessedForecastObservation(BaseModel): """ Hold the processed forecast and observation data with the resampling parameters. Parameters ---------- name: str original: :py:class:`solarforecastarbiter.datamodel.ForecastObservation` or :py:class:`solarforecastarbiter.ForecastAggregate` interval_value_type: str interval_length: pd.Timedelta interval_label: str valid_point_count: int The number of valid points in the processed forecast. forecast_values: pandas.Series or str or None The values of the forecast, the forecast id or None. observation_values: pandas.Series or str or None The values of the observation, the observation or aggregated id, or None. reference_forecast_values: pandas.Series or str or None The values of the reference forecast, the reference forecast id or None. validation_results: tuple of :py:class:`solarforecastarbiter.datamodel.ValidationResult` preprocessing_results: tuple of :py:class:`solarforecastarbiter.datamodel.PreprocessingResult` normalization_factor: pandas.Series or Float uncertainty: None or float If None, uncertainty is not accounted for. Float specifies the uncertainty as a percentage from 0 to 100%. cost: :py:class:`solarforecastarbiter.datamodel.Cost` or None The parameters to use when calculating cost metrics. """ # NOQA name: str # do this instead of subclass to compare objects later original: Union[ForecastObservation, ForecastAggregate] interval_value_type: str interval_length: pd.Timedelta interval_label: str valid_point_count: int forecast_values: Union[pd.Series, str, None] observation_values: Union[pd.Series, str, None] reference_forecast_values: Union[pd.Series, str, None] = None validation_results: Tuple[ValidationResult, ...] = () preprocessing_results: Tuple[PreprocessingResult, ...] = () # This may need to be a series, e.g. normalize by the average # observed value per day. Hence, repeat here instead of # only in original normalization_factor: Union[pd.Series, float] = 1.0 uncertainty: Union[None, float] = None cost: Union[Cost, None] = None
[docs]@dataclass(frozen=True) class MetricValue(BaseModel): """Class for storing the result of a single metric calculation. Parameters ---------- category: str The category of the metric value, e.g. total, monthly, hourly. metric: str The metric that was calculated. index: str The index of the metric value, e.g. '1-12' for monthly metrics or 0-23 for hourly. value: float The value calculated for the metric. """ category: str metric: str index: str value: float
[docs]@dataclass(frozen=True) class MetricResult(BaseModel): """Class for storing the results of many metric calculations for a single observation and forecast pair. Parameters ---------- name: str A descriptive name for the MetricResult. forecast_id: str UUID of the forecast being analyzed. values: tuple of :py:class: `solarforecastarbiter.datamodel.MetricValue` observation_id: str or None UUID of the observation being analyzed. aggregate_id: str or None UUID of the aggregate being analyzed. is_summary: bool If this metric result represents summary statistics of the observation and forecasts timeseries. Notes ----- Only one of `aggregate_id` or `observation_id` may be set. Raises ------ ValueError When both `aggregate_id` and `observation_id` are not None, or when both are None. """ name: str forecast_id: str values: Tuple[MetricValue, ...] observation_id: Union[str, None] = None aggregate_id: Union[str, None] = None is_summary: bool = False def __post_init__(self): if ( (self.observation_id is None and self.aggregate_id is None) or ( self.observation_id is not None and self.aggregate_id is not None ) ): raise ValueError( 'One of observation_id OR aggregate_id must be set')
def __check_plot_spec__(plot_spec): """Ensure that the provided plot specification is a valid JSON object""" try: spec_dict = json.loads(plot_spec) validate(instance=spec_dict, schema={'type': 'object'}) except (json.JSONDecodeError, ValidationError): raise ValueError('Figure spec must be a valid json object.')
[docs]@dataclass(frozen=True) class ReportFigure(BaseModel): """Parent class for different types of Report Figures""" def __post_init__(self): if type(self) == ReportFigure: raise ValueError("Invalid Report Figure. Figures must be of class " "PlotlyReportFigure or BokehReportFigure.") @classmethod def from_dict(model, input_dict, raise_on_extra=False): dict_ = input_dict.copy() if model != ReportFigure: return super().from_dict(dict_, raise_on_extra) figure_class = dict_.get('figure_class') if figure_class == 'plotly': return PlotlyReportFigure.from_dict(dict_, raise_on_extra) elif figure_class == 'bokeh': return BokehReportFigure.from_dict(dict_, raise_on_extra) else: raise NotImplementedError( 'Do not know how to process dict into a ReportFigure.')
@dataclass(frozen=True) class PlotlyReportFigure(ReportFigure): """A class for storing metric plots for a report with associated metadata. Parameters ---------- name: str A descriptive name for the figure. spec: str JSON string representation of the plotly plot. figure_type: str The type of plot, e.g. bar or scatter. pdf: str A static PDF copy of the plot, for including in PDF reports. svg: str DEPRECATED for pdf. A static svg copy of the plot. category: str The metric category. One of ALLOWED_CATEGORIES keys. metric: str The metric being plotted. """ name: str spec: str figure_type: str pdf: str = '' svg: str = '' category: str = '' metric: str = '' figure_class: str = 'plotly' def __post_init__(self): __check_plot_spec__(self.spec) @dataclass(frozen=True) class BokehReportFigure(ReportFigure): """A class for storing metric plots for a report with associated metadata. Parameters ---------- name: str A descriptive name for the figure. div: str An html div element to be target of Bokeh javascript. svg: str A static svg copy of the plot, for including in the pdf version. figure_type: str The type of plot, e.g. bar or scatter. category: str The metric category. One of ALLOWED_CATEGORIES keys. metric: str The metric being plotted. """ name: str div: str svg: str figure_type: str category: str = '' metric: str = '' figure_class: str = 'bokeh' def __bokeh_or_plotly__(cls): if cls.bokeh_version is not None and cls.plotly_version is not None: raise KeyError('Only provide one of "bokeh_version" or ' '"plotly_version" to RawReportPlots') elif cls.bokeh_version is None and cls.plotly_version is None: raise KeyError('Must provide one of "bokeh_version" or ' '"plotly_version" to RawReportPlots')
[docs]@dataclass(frozen=True) class RawReportPlots(BaseModel): """Class for storing collection of all metric plots on a raw report. Parameters ---------- figures: tuple of :py:class:`solarforecastarbiter.datamodel.ReportFigure` plotly_version: str The plotly version used when generating metrics plots. """ figures: Tuple[ReportFigure, ...] plotly_version: Union[str, None] = None bokeh_version: Union[str, None] = None script: Union[str, None] = None def __post_init__(self): __bokeh_or_plotly__(self) if self.bokeh_version is not None: if self.script is None: raise KeyError('Must provide script for Bokeh plots to ' 'RawReportPlots')
[docs]@dataclass(frozen=True) class ReportMessage(BaseModel): """Class for intercepting errors and warnings associated with report processing. Parameters ---------- messages: str step: str level: str function: str The function where the error originated. """ message: str step: str level: str function: str
@dataclass(frozen=True) class TimePeriod(BaseModel): """Class for storing a generic time period. For example, a report outage. Parameters ---------- start : pandas.Timestamp Start time of the time period. end : pandas.Timestamp End time of the time period. """ start: pd.Timestamp end: pd.Timestamp
[docs]@dataclass(frozen=True) class RawReport(BaseModel): """Class for holding the result of processing a report request including some metadata, the calculated metrics, plots, the processed forecast/observation data, and messages from report generation. This is called a "raw" report because this object, along with the report parameters, can be processed into a HTML or PDF report. Parameters ---------- generated_at: pandas.Timestamp The time at report computation. timezone: str The IANA timezone of the report. versions: dict Dictionary of version information to ensure the correct version of the core library is used when rendering or recomputing the report. plots: :py:class:`solarforecastarbiter.datamodel.RawReportPlots` metrics: tuple of :py:class:`solarforecastarbiter.datamodel.MetricResult` processed_forecasts_observations: tuple of :py:class:`solarforecastarbiter.datamodel.ProcessedForecastObservation` messages: tuple of :py:class:`solarforecastarbiter.datamodel.ReportMessage` data_checksum: str or None SHA-256 checksum of the raw data used in the report. outages: Tuple[TimePeriod, ...], optional List of report outage periods used when this raw report was generated. """ # NOQA generated_at: pd.Timestamp timezone: str versions: Tuple[Tuple[str, str], ...] plots: RawReportPlots metrics: Tuple[MetricResult, ...] processed_forecasts_observations: Tuple[ProcessedForecastObservation, ...] messages: Tuple[ReportMessage, ...] = () data_checksum: Union[str, None] = None outages: Tuple[TimePeriod, ...] = ()
def __check_cost_consistency__(object_pairs, available_costs): cost_names = [ac.name for ac in available_costs] for op in object_pairs: if op.cost is not None and op.cost not in cost_names: raise ValueError( f'Object pair cost, {op.cost}, not present in cost ' 'parameters specified here')
[docs]@dataclass(frozen=True) class ReportParameters(BaseModel): """Parameters required to define and generate a Report. Parameters ---------- name : str Name of the report. start : pandas.Timestamp Start time of the reporting period. end : pandas.Timestamp End time of the reporting period. forecast_fill_method : {'drop', 'forward', float} Indicates what process to use for handling missing forecasts. object_pairs: Tuple of ForecastObservation or ForecastAggregate Paired Forecasts and Observations or Aggregates to be analyzed in the report. metrics : Tuple of str Metrics to be computed in the report. categories : Tuple of str Categories to compute and organize metrics over in the report. filters : Tuple of Filters Filters to be applied to the data in the report. costs : Tuple of Costs Cost parameters that can be referenced in `object_pairs` to compute cost metrics for that pair. Each object pair must have the 'cost' parameter set to None (no cost calculation will be performed) or one of the names of these costs. timezone : str or None The timezone in which to compute daily, hourly, etc. statistics. If None, inferred from data in object_pairs. """ name: str start: pd.Timestamp end: pd.Timestamp object_pairs: Tuple[Union[ForecastObservation, ForecastAggregate], ...] metrics: Tuple[str, ...] = ('mae', 'mbe', 'rmse') categories: Tuple[str, ...] = ('total', 'date', 'hour') forecast_fill_method: str = 'drop' filters: Tuple[BaseFilter, ...] = field( default_factory=lambda: (QualityFlagFilter(), )) costs: Tuple[Cost, ...] = tuple() timezone: Optional[str] = None def __post_init__(self): # ensure that all forecast and observation units are the same __check_units__(*itertools.chain.from_iterable( (k.forecast, k.data_object) for k in self.object_pairs)) # ensure the metrics can be applied to the forecasts and observations for k in self.object_pairs: __check_metrics__(k.forecast, self.metrics) # ensure that categories are valid __check_categories__(self.categories) __check_cost_consistency__(self.object_pairs, self.costs)
[docs]@dataclass(frozen=True) class Report(BaseModel): """Class for keeping track of report metadata and the raw report that can later be rendered to HTML or PDF. Functions in :py:mod:`~solarforecastarbiter.reports.main` take a Report object with `raw_report` set to None, generate the report, and return another Report object with `raw_report` set to a RawReport object that can be rendered. Parameters ---------- report_parameters : ReportParameters Metadata required to specify and generate the report. raw_report : RawReport or None Once computed, the raw report should be stored here status : str Status of the report report_id : str ID of the report in the API provider : str, optional Provider of the Report information. outages: Tuple[TimePeriod, ...], optional List of report outage periods. __version__ : str Should be used to version reports to ensure even older reports can be properly rendered """ report_parameters: ReportParameters raw_report: Union[None, RawReport] = None status: str = 'pending' report_id: str = '' provider: str = '' outages: Tuple[TimePeriod, ...] = () __version__: int = 0 # should add version to api
FORECAST_TYPE_MAPPING = { 'forecast': Forecast, 'event_forecast': EventForecast, 'probabilistic_forecast': ProbabilisticForecast, 'probabilistic_forecast_constant_value': ProbabilisticForecastConstantValue }