Source code for solarforecastarbiter.reports.figures.bokeh_figures

"""
Functions to make all of the figures for Solar Forecast Arbiter reports using
Bokeh.

This code is currently unreachable from the rest of the Solar Forecast Arbiter
Core library. It may be used in place of the plotly_figures to generate bokeh
plots for the `plots` attribute of the RawReport object. See
:py:mod:`solarforecastarbiter.reports.main` for an example of report
generation.
"""
import calendar
from contextlib import contextmanager
import datetime as dt
from itertools import cycle
import logging
import warnings


from bokeh.embed import components
from bokeh.io.export import get_svgs
from bokeh.layouts import gridplot
from bokeh.models import (ColumnDataSource, HoverTool, Legend,
                          DatetimeTickFormatter, CategoricalTickFormatter,
                          CDSView, GroupFilter, BooleanFilter)
from bokeh.models.ranges import Range1d, FactorRange, DataRange1d
from bokeh.plotting import figure
from bokeh.transform import factor_cmap, dodge
from bokeh import palettes
from bokeh import __version__ as bokeh_version
import pandas as pd
import numpy as np


from solarforecastarbiter import datamodel
from solarforecastarbiter.plotting.utils import line_or_step


logger = logging.getLogger(__name__)
PALETTE = (
    palettes.d3['Category20'][20][::2] + palettes.d3['Category20'][20][1::2])
_num_obs_colors = 3
# drop white
OBS_PALETTE = list(palettes.grey(_num_obs_colors + 1)[0:_num_obs_colors])
OBS_PALETTE.reverse()
OBS_PALETTE_TD_RANGE = pd.timedelta_range(
    freq='10min', end='60min', periods=_num_obs_colors)


[docs]def construct_timeseries_cds(report): """Construct two standardized Bokeh CDS for the timeseries and scatter plot functions. One with timeseries data for all observations, aggregates, and forecasts in the report, and the other with associated metadata sharing a common `pair_index` key. Parameters ---------- report: :py:class:`solarforecastarbiter.datamodel.Report` Returns ------- value_cds : bokeh.models.ColumnDataSource Keys are an integer `pair_index` for pairing values with the metadata in the metadata_cds, and two pandas.Series, `observation_values` and `forecast_values`. metadata_cds : bokeh.models.ColumnDataSource This cds has the following keys: - `pair_index`: Integer for pairing metadata with the values in the value_cds. - `observation_name`: Observation name. - `forecast_name`: Forecast name. - `interval_label`: Interval label of the processed forecast and observation data. - `observation_hash`: Hash of the original observation object and the `datamodel.ProcessedForecastObservations` metadata. - `forecast_hash`: Hash of the original forecast object and the `datamodel.ProcessedForecastObservations` metadata. """ # NOQA value_frames = [] meta_rows = [] for idx, pfxobs in enumerate( report.raw_report.processed_forecasts_observations): value_frame_dict = { 'pair_index': idx, 'observation_values': pfxobs.observation_values, 'forecast_values': pfxobs.forecast_values, } meta_row_dict = { 'pair_index': idx, 'observation_name': _obs_name(pfxobs.original), 'forecast_name': _fx_name(pfxobs.original), 'interval_label': pfxobs.interval_label, 'observation_hash': str(hash( (pfxobs.original.data_object, pfxobs.interval_length, pfxobs.interval_value_type, pfxobs.interval_label))), 'forecast_hash': str(hash( (pfxobs.original.forecast, pfxobs.interval_length, pfxobs.interval_value_type, pfxobs.interval_label))), 'observation_color': _obs_color( pfxobs.interval_length) } value_frames.append(pd.DataFrame(value_frame_dict)) meta_rows.append(meta_row_dict) data = pd.concat(value_frames) metadata = pd.DataFrame(meta_rows) # drop tz info from localized times. GH164 data = data.tz_localize(None) data = data.rename_axis('timestamp') value_cds = ColumnDataSource(data) metadata_cds = ColumnDataSource(metadata) return value_cds, metadata_cds
def _obs_name(fx_obs): # TODO: add code to ensure obs names are unique name = fx_obs.data_object.name if fx_obs.forecast.name == fx_obs.data_object.name: if isinstance(fx_obs.data_object, datamodel.Observation): name += ' Observation' else: name += ' Aggregate' return name def _fx_name(fx_obs): # TODO: add code to ensure fx names are unique name = fx_obs.forecast.name if fx_obs.forecast.name == fx_obs.data_object.name: name += ' Forecast' return name def _obs_color(interval_length): idx = np.searchsorted(OBS_PALETTE_TD_RANGE, interval_length) obs_color = OBS_PALETTE[idx] return obs_color def _boolean_filter_indices_by_pair(value_cds, pair_index): return value_cds.data['pair_index'] == pair_index def _extract_metadata_from_cds(metadata_cds, hash_, hash_key): first_row = np.argwhere(metadata_cds.data[hash_key] == hash_)[0][0] return { 'pair_index': metadata_cds.data['pair_index'][first_row], 'observation_name': metadata_cds.data['observation_name'][first_row], 'forecast_name': metadata_cds.data['forecast_name'][first_row], 'interval_label': metadata_cds.data['interval_label'][first_row], 'observation_color': metadata_cds.data['observation_color'][first_row], }
[docs]def timeseries(timeseries_value_cds, timeseries_meta_cds, start, end, units, timezone='UTC'): """ Timeseries plot of one or more forecasts and observations. Parameters ---------- timeseries_value_cds: bokeh.models.ColumnDataSource ColumnDataSource of timeseries data. See :py:func:`solarforecastarbiter.reports.reoports.figures.construct_timeseries_cds` for format. timeseries_meta_cds: bokeh.models.ColumnDataSource ColumnDataSource of metadata for each Observation Forecast pair. See :py:func:`solarforecastarbiter.reports.reoports.figures.construct_timeseries_cds` for format. start : pandas.Timestamp Report start time end : pandas.Timestamp Report end time timezone : str Timezone consistent with the data in the obs_fx_cds. Returns ------- fig : bokeh.plotting.figure """ # NOQA palette = cycle(PALETTE) fig = figure( sizing_mode='scale_width', plot_width=900, plot_height=300, x_range=(start, end), x_axis_type='datetime', tools='pan,xwheel_zoom,box_zoom,box_select,lasso_select,reset,save', name='timeseries') plotted_objects = 0 for obs_hash in np.unique(timeseries_meta_cds.data['observation_hash']): metadata = _extract_metadata_from_cds( timeseries_meta_cds, obs_hash, 'observation_hash') pair_indices = _boolean_filter_indices_by_pair( timeseries_value_cds, metadata['pair_index']) view = CDSView(source=timeseries_value_cds, filters=[ BooleanFilter(pair_indices) ]) plot_method, plot_kwargs, hover_kwargs = line_or_step( metadata['interval_label']) legend_label = metadata['observation_name'] color = metadata['observation_color'] getattr(fig, plot_method)( x='timestamp', y='observation_values', source=timeseries_value_cds, view=view, color=color, legend_label=legend_label, **plot_kwargs) plotted_objects += 1 for fx_hash in np.unique(timeseries_meta_cds.data['forecast_hash']): metadata = _extract_metadata_from_cds( timeseries_meta_cds, fx_hash, 'forecast_hash') pair_indices = _boolean_filter_indices_by_pair( timeseries_value_cds, metadata['pair_index']) view = CDSView(source=timeseries_value_cds, filters=[BooleanFilter(pair_indices)]) plot_method, plot_kwargs, hover_kwargs = line_or_step( metadata['interval_label']) legend_label = metadata['forecast_name'] color = next(palette) getattr(fig, plot_method)( x='timestamp', y='forecast_values', source=timeseries_value_cds, view=view, color=color, legend_label=legend_label, **plot_kwargs) plotted_objects += 1 fig.legend.location = "top_left" fig.legend.click_policy = "hide" if plotted_objects > 10: fig.legend.label_height = 10 fig.legend.label_text_font_size = '8px' fig.legend.glyph_height = 10 fig.legend.spacing = 1 fig.legend.margin = 0 fig.xaxis.axis_label = f'Time ({timezone})' fig.yaxis.axis_label = f'Data ({units})' return fig
def _get_scatter_limits(cds): extremes = [np.nan] for kind in ('forecast_values', 'observation_values'): arr = np.asarray(cds.data[kind]).astype(float) if len(arr) != 0: extremes.append(np.nanmin(arr)) extremes.append(np.nanmax(arr)) min_ = np.nanmin(extremes) if np.isnan(min_): min_ = -999 max_ = np.nanmax(extremes) if np.isnan(max_): max_ = 999 return min_, max_
[docs]def scatter(timeseries_value_cds, timeseries_meta_cds, units): """ Scatter plot of one or more forecasts and observations. Parameters ---------- timeseries_value_cds: bokeh.models.ColumnDataSource ColumnDataSource of timeseries data. See :py:func:`solarforecastarbiter.reports.reoports.figures.construct_timeseries_cds` for format. timeseries_meta_cds: bokeh.models.ColumnDataSource ColumnDataSource of metadata for each Observation Forecast pair. See :py:func:`solarforecastarbiter.reports.reoports.figures.construct_timeseries_cds` for format. Returns ------- fig : bokeh.plotting.figure """ # NOQA xy_min, xy_max = _get_scatter_limits(timeseries_value_cds) # match_aspect=True does not work well, so these need to be close plot_height = 400 # width will be updated later based on label length plot_width = plot_height + 50 fig = figure( plot_width=plot_width, plot_height=plot_height, match_aspect=True, x_range=Range1d(xy_min, xy_max), y_range=Range1d(xy_min, xy_max), tools='pan,wheel_zoom,box_zoom,box_select,lasso_select,reset,save', name='scatter') kwargs = dict(size=6, line_color=None) palette = cycle(PALETTE) # accumulate labels and plot objects for manual legend scatters_labels = [] for fxhash in np.unique(timeseries_meta_cds.data['forecast_hash']): metadata = _extract_metadata_from_cds( timeseries_meta_cds, fxhash, 'forecast_hash') pair_indices = _boolean_filter_indices_by_pair( timeseries_value_cds, metadata['pair_index']) view = CDSView(source=timeseries_value_cds, filters=[BooleanFilter(pair_indices)]) label = metadata['forecast_name'] r = fig.scatter( x='observation_values', y='forecast_values', source=timeseries_value_cds, view=view, fill_color=next(palette), **kwargs) scatters_labels.append((label, [r])) # manual legend so it can be placed outside the plot area legend = Legend(items=scatters_labels, location='top_center', click_policy='hide') fig.add_layout(legend, 'right') # compute new plot width accounting for legend label text width. # also considered using second figure for legend so it doesn't # distort the first when text length/size changes. unfortunately, # that doesn't work due to bokeh's inability to communicate legend # information across figures. # widest part of the legend max_legend_length = max((len(label) for label, _ in scatters_labels)) px_per_length = 7.75 # found through trial and error fig.plot_width = int(fig.plot_width + max_legend_length * px_per_length) label = f'({units})' fig.xaxis.axis_label = 'Observed ' + label fig.yaxis.axis_label = 'Forecast ' + label return fig
[docs]def construct_metrics_cds(metrics, rename=None): """ Possibly bad assumptions: * metrics contains keys: name, Total, etc. Parameters ---------- metrics : list of datamodel.MetricResults Each metric dict is for a different forecast. Forecast name is specified by the name key. rename : function or None Function of one argument that is applied to each forecast name. Returns ------- cds : bokeh.models.ColumnDataSource ColumnDataSource with indices 'name', 'abbrev', 'category', 'metric', and 'value'. """ if rename: f = rename else: def f(x): return x # NOQA data = [] for metric_result in metrics: for mvalue in metric_result.values: new = { 'name': metric_result.name, 'abbrev': f(metric_result.name), 'category': mvalue.category, 'metric': mvalue.metric, 'value': mvalue.value } if new['category'] == 'date': new['index'] = dt.datetime.strptime( mvalue.index, '%Y-%m-%d') else: new['index'] = mvalue.index data.append(new) df = pd.DataFrame(data, columns=[ 'name', 'abbrev', 'category', 'metric', 'value', 'index' ]) cds = ColumnDataSource(df, name='metrics_cds') cds.data.pop('level_0', None) return cds
def abbreviate(x, limit=3): # might need to add logic to ensure uniqueness # and/or enforce max length using textwrap.shorten components = x.split(' ') out_components = [] for c in components: if len(c) <= limit: out = c elif c.upper() == c: # probably an acronym out = c else: out = f'{c[0:limit]}.' out_components.append(out) return ' '.join(out_components)
[docs]def bar(cds, metric): """ Create a bar graph comparing a single metric across forecasts. Parameters ---------- cds : bokeh.models.ColumnDataSource Metric cds created by :py:func:`solarforecastarbiter.reports.figures.construct_metrics_cds` metric: str The metric to plot. This value should be found in cds['metric']. Returns ------- data_table : bokeh.widgets.DataTable """ # NOQA x_range = np.unique(cds.data['abbrev']) palette = cycle(PALETTE) palette = [next(palette) for _ in x_range] metric_name = datamodel.ALLOWED_METRICS[metric] view = CDSView(source=cds, filters=[ GroupFilter(column_name='metric', group=metric), GroupFilter(column_name='category', group='total') ]) # TODO: add units to title fig = figure(x_range=x_range, width=800, height=200, title=metric_name, name=f'{metric}_total_bar', toolbar_location='above', tools='pan,xwheel_zoom,box_zoom,reset,save') fig.vbar(x='abbrev', top='value', width=0.8, source=cds, view=view, line_color='white', fill_color=factor_cmap('abbrev', palette, factors=x_range)) fig.xgrid.grid_line_color = None tooltips = [ ('Forecast', '@name'), (metric_name, '@value'), ] hover = HoverTool(tooltips=tooltips, mode='vline') # more accurate would be if any single name is longer than each # name's allotted space. For example, never need to rotate labels # if forecasts are named A, B, C, D... but quickly need to rotate # if they have long names. if len(x_range) > 6: # pi/4 looks a lot better, but first tick label flows off chart # and I can't figure out how to add padding in bokeh fig.xaxis.major_label_orientation = np.pi / 2 fig.width = 800 # add more height to figure so that the names can go somewhere. fig.height = 400 fig.add_tools(hover) return fig
def calc_y_start_end(y_min, y_max, pad_factor=1.03): """ Determine y axis start, end. Parameters ---------- y_min : float y_max : float pad_factor : float Number by which to multiply the start, end. Returns ------- start, end : float, float """ # bokeh does not play well with nans y_min = np.nan_to_num(y_min) y_max = np.nan_to_num(y_max) if y_max < 0: # all negative, so set range from y_min to 0 start = y_min end = 0 elif y_min > 0: # all positive, so set range from 0 to y_max start = 0 end = y_max else: start = y_min end = y_max start, end = pad_factor * start, pad_factor * end return start, end
[docs]def bar_subdivisions(cds, category, metric): """ Create bar graphs comparing a single metric across subdivisions of time for multiple forecasts. e.g.:: Fx 1 MAE | |_________________ Fx 2 MAE | |_________________ Year, Month of the year, etc. Parameters ---------- cds : bokeh.models.ColumnDataSource Fields must be kind and the names of the forecasts category : str One of the available metrics grouping categories (e.g., total) Returns ------- figs : dict of figures """ palette = cycle(PALETTE) tools = 'pan,xwheel_zoom,box_zoom,reset,save' fig_kwargs = dict(tools=tools, toolbar_location='above') figs = {} width = 0.8 human_category = datamodel.ALLOWED_CATEGORIES[category] metric_name = datamodel.ALLOWED_DETERMINISTIC_METRICS[metric] fig_kwargs['x_axis_label'] = human_category fig_kwargs['y_axis_label'] = metric_name filter_ = ((np.asarray(cds.data['category']) == category) & (np.asarray(cds.data['metric']) == metric)) # Special handling for x-axis with dates if category == 'date': fig_kwargs['x_axis_type'] = 'datetime' width = width * pd.Timedelta(days=1) fig_kwargs['x_range'] = DataRange1d() elif category == 'month': fig_kwargs['x_range'] = FactorRange( factors=calendar.month_abbr[1:]) elif category == 'weekday': fig_kwargs['x_range'] = FactorRange( factors=calendar.day_abbr[0:]) elif category == 'hour': fig_kwargs['x_range'] = FactorRange( factors=[str(i) for i in range(25)]) else: fig_kwargs['x_range'] = FactorRange( factors=np.unique(cds.data['index'][filter_])) y_data = np.asarray(cds.data['value'])[filter_] if len(y_data) == 0: start, end = None, None else: y_min = np.nanmin(y_data) y_max = np.nanmax(y_data) start, end = calc_y_start_end(y_min, y_max) fig_kwargs['y_range'] = DataRange1d(start=start, end=end) unique_names = np.unique(np.asarray(cds.data['name'])[filter_]) for name in unique_names: view = CDSView(source=cds, filters=[ GroupFilter(column_name='metric', group=metric), GroupFilter(column_name='category', group=category), GroupFilter(column_name='name', group=name) ]) # Create figure title = name + ' ' + metric_name fig = figure(width=800, height=200, title=title, name=f'{category}_{metric}_{name}', **fig_kwargs) # Custom bar alignment if category == 'hour': # Center bars between hour ticks x = dodge('index', 0.5, range=fig.x_range) else: x = 'index' fig.vbar(x=x, top='value', width=width, source=cds, view=view, line_color='white', fill_color=next(palette)) # axes parameters fig.xgrid.grid_line_color = None fig.xaxis.minor_tick_line_color = None # Hover tool and format specific changes if category == 'date': # Datetime x-axis formatter = DatetimeTickFormatter(days='%Y-%m-%d') fig.xaxis.formatter = formatter tooltips = [ ('Forecast', '@name'), (human_category, '@index{%F}'), (metric_name, '@value'), ] hover_kwargs = dict(tooltips=tooltips, formatters={'index': 'datetime'}) elif category == 'month' or category == 'weekday': # Categorical x-axis formatter = CategoricalTickFormatter() fig.xaxis.formatter = formatter tooltips = [ ('Forecast', '@name'), (human_category, '@index'), (metric_name, '@value'), ] hover_kwargs = dict(tooltips=tooltips) else: # Numerical x-axis tooltips = [ ('Forecast', '@name'), (human_category, '@index'), (metric_name, '@value'), ] hover_kwargs = dict(tooltips=tooltips) hover = HoverTool(mode='vline', **hover_kwargs) fig.add_tools(hover) figs[name] = fig return figs
def nested_bar(): raise NotImplementedError def joint_distribution(): raise NotImplementedError def marginal_distribution(): raise NotImplementedError def taylor_diagram(): raise NotImplementedError def probabilistic_timeseries(): raise NotImplementedError def reliability_diagram(): raise NotImplementedError def rank_histogram(): raise NotImplementedError @contextmanager def _make_webdriver(): """Necessary until Bokeh 2.0 when using chrome/firefox drivers will be preferred and to avoid zombie phantomjs processes for now""" from selenium import webdriver with warnings.catch_warnings(): warnings.simplefilter('ignore') try: driver = webdriver.PhantomJS() except Exception: yield None else: yield driver driver.quit()
[docs]def output_svg(fig, driver=None): """ Generates an SVG from the Bokeh figure. Errors in the process are logged and an SVG with error text is returned. Parameters ---------- fig : bokeh.plotting.Figure driver : selenium.webdriver.remote.webdriver.WebDriver, default None Web driver to use to render SVG figures. With bokeh<2.0 this defaults to trying to use phantomjs. Returns ------- svg : str """ fig.output_backend = 'svg' try: svg = get_svgs(fig, driver=driver)[0] except Exception: logger.error('Could not generate SVG for figure %s', getattr(fig, 'name', 'unnamed')) svg = ( '<svg width="100%" height="100%">' '<text x="50" y="50" class="alert alert-error">' 'Unable to generate SVG plot.' '</text>' '</svg>') return svg
[docs]def raw_report_plots(report, metrics): """Create a RawReportPlots object from the metrics of a report. Parameters ---------- report: :py:class:`solarforecastarbiter.datamodel.Report` metrics: tuple of :py:class:`solarforecastarbiter.datamodel.MetricResult` Returns ------- :py:class:`solarforecastarbiter.datamodel.RawReportPlots` """ cds = construct_metrics_cds(metrics, rename=abbreviate) # Create initial bar figures figure_dict = {} # Components for other metrics for category in report.report_parameters.categories: for metric in report.report_parameters.metrics: if category == 'total': fig = bar(cds, metric) figure_dict[f'total::{metric}::all'] = fig else: figs = bar_subdivisions(cds, category, metric) for name, fig in figs.items(): figure_dict[f'{category}::{metric}::{name}'] = fig script, divs = components(figure_dict) mplots = [] with _make_webdriver() as driver: for k, v in divs.items(): cat, met, name = k.split('::', 2) fig = figure_dict[k] svg = output_svg(fig, driver=driver) mplots.append(datamodel.BokehReportFigure( name=name, category=cat, metric=met, div=v, svg=svg, figure_type='bar')) out = datamodel.RawReportPlots(bokeh_version=bokeh_version, script=script, figures=tuple(mplots)) return out
[docs]def timeseries_plots(report): """Return the bokeh components (script and div element) for timeseries and scatter plots of the processed forecasts and observations. Parameters ---------- report: :py:class:`solarforecastarbiter.datamodel.Report` Returns ------- script: str A script element to insert into an html template div: str A div element to insert into an html template. """ value_cds, meta_cds = construct_timeseries_cds(report) pfxobs = report.raw_report.processed_forecasts_observations units = pfxobs[0].original.forecast.units tfig = timeseries(value_cds, meta_cds, report.report_parameters.start, report.report_parameters.end, units, report.raw_report.timezone) sfig = scatter(value_cds, meta_cds, units) layout = gridplot((tfig, sfig), ncols=1) script, div = components(layout) return script, div