Source code for solarforecastarbiter.reports.template

"""
Inserts metadata and figures into the report template.
"""
import base64
import json
import logging
from pathlib import Path
import re
import subprocess
import tempfile


from bokeh import __version__ as bokeh_version
from jinja2 import Environment, PackageLoader, select_autoescape, ChoiceLoader
from jinja2.runtime import Undefined
from plotly import __version__ as plotly_version


from solarforecastarbiter import datamodel
from solarforecastarbiter.reports.figures import plotly_figures


logger = logging.getLogger(__name__)


def build_metrics_json(report):
    """Creates a dict from the metrics results in the report.

    Parameters
    ----------
    report: :py:class:`solarforecastarbiter.datamodel.Report`

    Returns
    -------
    str
        The json representing the report metrics. The string will be a string
        representing an empty json array if the report does not have a
        computed raw_report.
    """
    if getattr(report, 'raw_report') is not None:
        df = plotly_figures.construct_metrics_dataframe(
            list(filter(lambda x: not getattr(x, 'is_summary', False),
                 report.raw_report.metrics)),
            rename=plotly_figures.abbreviate)
        return df.to_json(orient="records")
    else:
        return "[]"


def build_summary_stats_json(report):
    """Creates a dict from the summary statistics in the report.

    Parameters
    ----------
    report: :py:class:`solarforecastarbiter.datamodel.Report`

    Returns
    -------
    str
        The json representing the summary statistics. Will be a string
        representing an empty json array if the report does not have a
        computed raw_report.

    Raises
    ------
    ValueError
        If report.raw_report is populated but no
        report.raw_report.metrics have `is_summary == True`
        indicating that the report was made without
        summary statistics.
    """
    if getattr(report, 'raw_report') is not None:
        df = plotly_figures.construct_metrics_dataframe(
            list(filter(lambda x: getattr(x, 'is_summary', False),
                 report.raw_report.metrics)),
            rename=plotly_figures.abbreviate)
        if df.empty:
            raise ValueError('No summary statistics in report.')
        return df.to_json(orient="records")
    else:
        return "[]"


def build_metadata_json(report):
    """Creates a JSON array of ProcessedForecastObservations parameters
    in the report.

    Parameters
    ----------
    report: :py:class:`solarforecastarbiter.datamodel.Report`

    Returns
    -------
    str
        The JSON representing the report forecast-observation metadata.
    """
    if getattr(report, 'raw_report') is None:
        return "[]"

    drop_keys = {
        '__blurb__', 'site', 'aggregate',
    }

    def _process_forecast(fx):
        if fx is None:
            return None
        out = {k: v for k, v in fx.to_dict().items()
               if k not in drop_keys}
        if isinstance(fx, datamodel.ProbabilisticForecast):
            out['constant_values'] = [
                cdf.constant_value for cdf in fx.constant_values]
        return out

    out = []
    for pfxobs in report.raw_report.processed_forecasts_observations:
        minp = pfxobs.replace(original=None)
        thisout = {k: v for k, v in minp.to_dict().items()
                   if k in (
                           'name', 'interval_value_type', 'interval_length',
                           'interval_label', 'normalization_factor',
                           'uncertainty', 'cost')}

        thisout['forecast'] = _process_forecast(pfxobs.original.forecast)
        thisout['reference_forecast'] = _process_forecast(
            pfxobs.original.reference_forecast)
        thisout['observation'] = None
        thisout['aggregate'] = None
        if hasattr(pfxobs.original, 'observation'):
            thisout['observation'] = {
                k: v for k, v in pfxobs.original.observation.to_dict().items()
                if k not in drop_keys
            }
        elif hasattr(pfxobs.original, 'aggregate'):
            thisout['aggregate'] = {
                k: v for k, v in pfxobs.original.aggregate.to_dict().items()
                if k not in drop_keys or k == 'observations'
            }
            obs = []
            for aggobs in pfxobs.original.aggregate.observations:
                obsd = aggobs.to_dict()
                obsd['observation_id'] = obsd.pop('observation')[
                    'observation_id']
                obs.append(obsd)
            thisout['aggregate']['observations'] = obs
        out.append(thisout)
    return json.dumps(out).replace('NaN', 'null')


def _get_render_kwargs(report, dash_url, with_timeseries):
    """Creates a dictionary of key word template arguments for a jinja2
    report template.

    Parameters
    ----------
    report: :py:class:`solarforecastarbiter.datamodel.Report`
    dash_url: str
        URL of the Solar Forecast arbiter dashboard to use when building links.
    with_timeseries: bool
        Whether or not to include timeseries plots. If an error occurs when
        trying to generate timeseries plots, the `timeseries_spec`,
        `scatter_spec`, and `timeseries_prob_spec` arguments will not be
        defined.

    Returns
    -------
    kwargs: dict
        Dictionary of template variables to unpack as key word arguments when
        rendering.
    """
    # macros render columns for every allowed summary statistic, so be
    # specific about which columns to include to avoid unnecessary blanks.

    # Check that the report is complete, and if the processed forecasts are
    # all event forecasts. Checking processed forecast pairs instead of
    # report_parameters.object pairs allows us to skip the step of loading
    # or shuffling around forecasts when working with a raw api response on
    # the dashboard without the aid of solarforecastarbiter.io.api's
    # process_report_dict. See issue 694 for context.
    if report.status == "complete" and all(
        type(x.original.forecast) is datamodel.EventForecast for x in
        report.raw_report.processed_forecasts_observations
    ):
        human_statistics = datamodel.ALLOWED_EVENT_SUMMARY_STATISTICS
    else:
        human_statistics = datamodel.ALLOWED_DETERMINISTIC_SUMMARY_STATISTICS

    # macros only render columns/plots for metrics that actually exist,
    # so no need to be specific to avoid unnecessary blanks
    kwargs = dict(
        human_categories=datamodel.ALLOWED_CATEGORIES,
        human_metrics=datamodel.ALLOWED_METRICS,
        human_statistics=human_statistics,
        report=report,
        category_blurbs=datamodel.CATEGORY_BLURBS,
        dash_url=dash_url,
        metrics_json=build_metrics_json(report),
        metadata_json=build_metadata_json(report),
        templating_messages=[]
    )
    report_plots = getattr(report.raw_report, 'plots', None)
    # get plotting library versions used when plots were generated.
    # if plot generation failed, fallback to the curent version
    plot_bokeh = getattr(report_plots, 'bokeh_version', None)
    kwargs['bokeh_version'] = plot_bokeh if plot_bokeh else bokeh_version

    plot_plotly = getattr(report_plots, 'plotly_version', None)
    kwargs['plotly_version'] = plot_plotly if plot_plotly else plotly_version

    try:
        kwargs['summary_stats'] = build_summary_stats_json(report)
    except ValueError:
        kwargs['templating_messages'].append(
            'No data summary statistics were calculated with this report.')
        kwargs['summary_stats'] = '[]'

    if with_timeseries:
        try:
            timeseries_specs = plotly_figures.timeseries_plots(report)
        except Exception:
            logger.exception(
                'Failed to make Plotly items for timeseries and scatterplot')
        else:
            if timeseries_specs[0] is not None:
                kwargs['timeseries_spec'] = timeseries_specs[0]

            if timeseries_specs[1] is not None:
                kwargs['scatter_spec'] = timeseries_specs[1]

            if timeseries_specs[2] is not None:
                kwargs['timeseries_prob_spec'] = timeseries_specs[2]

            kwargs['includes_distribution'] = timeseries_specs[3]

    return kwargs


def _pretty_json(value):
    if isinstance(value, Undefined):  # pragma: no cover
        return value
    return json.dumps(value, indent=4, separators=(',', ':'))


def _figure_name_filter(value):
    """replace characters that may cause problems for html/javascript ids"""
    if isinstance(value, Undefined):
        return value
    out = (value
           .replace('^', '-')
           .replace(' ', '-')
           .replace('.', 'dot')
           .replace('%', 'percent')
           .replace('<', 'lt')
           .replace('>', 'gt')
           .replace('=', 'eq')
           .replace('(', 'lp')
           .replace(')', 'rp')
           .replace('/', 'fsl')
           .replace('\\', 'bsl')
           )
    out = re.sub('[^\\w-]', 'special', out)
    return out


def _unique_flags_filter(proc_fxobs_list, before_resample):
    # use a dict to preserve order and guarantee uniqueness of keys
    names = {}
    for proc_fxobs in proc_fxobs_list:
        for val_result in proc_fxobs.validation_results:
            if val_result.before_resample == before_resample:
                names[val_result.flag] = None
    unique_names = list(names.keys())
    return unique_names


[docs]def get_template_and_kwargs(report, dash_url, with_timeseries, body_only): """Returns the jinja2 Template object and a dict of template variables for the report. If the report failed to compute, the template and kwargs will be for an error page. Parameters ---------- report: :py:class:`solarforecastarbiter.datamodel.Report` dash_url: str URL of the Solar Forecast arbiter dashboard to use when building links. with_timeseries: bool Whether or not to include timeseries plots. body_only: bool When True, returns a div for injecting into another template, otherwise returns a full html document with the required <html> and <head> tags. Returns ------- template: jinja2.environment.Template kwargs: dict Dictionary of template variables to use as keyword arguments to template.render(). """ env = Environment( loader=ChoiceLoader([ PackageLoader('solarforecastarbiter.reports', 'templates/html'), PackageLoader('solarforecastarbiter.reports', 'templates'), ]), autoescape=select_autoescape(['html', 'xml']), lstrip_blocks=True, trim_blocks=True, extensions=['jinja2.ext.do'] ) env.filters['pretty_json'] = _pretty_json env.filters['figure_name_filter'] = _figure_name_filter env.filters['unique_flags_filter'] = _unique_flags_filter kwargs = _get_render_kwargs(report, dash_url, with_timeseries) if report.status == 'complete': template = env.get_template('body.html') elif report.status == 'failed': template = env.get_template('failure.html') elif report.status == 'pending': template = env.get_template('pending.html') else: raise ValueError(f'Unknown status for report {report.status}') if body_only: kwargs['base_template'] = env.get_template('empty_base.html') else: kwargs['base_template'] = env.get_template('base.html') return template, kwargs
[docs]def render_html(report, dash_url=datamodel.DASH_URL, with_timeseries=True, body_only=False): """Create full html file. The Solar Forecast Arbiter dashboard will likely use its own templates for rendering the full html. Parameters ---------- report: :py:class:`solarforecastarbiter.datamodel.Report` dash_url: str URL of the Solar Forecast arbiter dashboard to use when building links. with_timeseries: bool Whether or not to include timeseries plots. body_only: bool When True, returns a div for injecting into another template, otherwise returns a full html document with the required <html> and <head> tags. Returns ------- str The rendered html report """ template, kwargs = get_template_and_kwargs( report, dash_url, with_timeseries, body_only) out = template.render(**kwargs) return out
def _link_filter(value): """convert html href markup to tex href markup""" if isinstance(value, Undefined): # pragma: no cover return value match = re.search( """<a\\s+(?:[^>]*?\\s+)?href=(["'])(.*?)(["'])>(.*?)<\\/a>""", value, re.DOTALL) if match: new = "\\href{" + match.group(2) + "}{" + match.group(4) + "}" out = value[:match.start()] + new + value[match.end():] return out else: return value def _html_to_tex(value): if isinstance(value, Undefined): return value value = (value .replace('<p>', '') .replace('</p>', '\n') .replace('<em>', '\\emph{') .replace('</em>', '}') .replace('<code>', '\\verb|') .replace('</code>', '|') .replace('<b>', '\\textbf{') .replace('</b>', '}') .replace('<ol>', '\\begin{enumerate}') .replace('</ol>', '\\end{enumerate}') .replace('<li>', '\\item ') .replace('</li>', '\n') .replace('</a>', '') .replace('<=', '$\\leq$') .replace("%", "\\%") .replace('W/m^2', '$W/m^2$') ) value = re.sub('\\<a.*\\>', '', value) return value
[docs]def render_pdf(report, dash_url, max_runs=5): """ Create a PDF report using LaTeX. Parameters ---------- report: :py:class:`solarforecastarbiter.datamodel.Report` dash_url: str URL of the Solar Forecast Arbiter dashboard to use when building links. max_runs: int, default 5 Maximum number of times to run pdflatex Returns ------- bytes The rendered PDF report Notes ----- This code was inspired by the latex package available at https://github.com/mbr/latex/ under the following license: Copyright (c) 2015, Marc Brinkmann All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of latex nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ # NOQA env = Environment( loader=ChoiceLoader([ PackageLoader('solarforecastarbiter.reports', 'templates/pdf'), PackageLoader('solarforecastarbiter.reports', 'templates'), ]), autoescape=False, lstrip_blocks=True, trim_blocks=True, block_start_string='\\BLOCK{', block_end_string='}', variable_start_string='\\VAR{', variable_end_string='}', comment_start_string='\\#{', comment_end_string='}', line_statement_prefix='%-', line_comment_prefix='%#' ) env.filters['html_to_tex'] = _html_to_tex env.filters['link_filter'] = _link_filter env.filters['pretty_json'] = _pretty_json env.filters['unique_flags_filter'] = _unique_flags_filter kwargs = _get_render_kwargs(report, dash_url, False) with tempfile.TemporaryDirectory() as _tmpdir: tmpdir = Path(_tmpdir) logfile, auxfile = _prepare_latex_support_files(tmpdir, env, kwargs) _save_figures_to_pdf(tmpdir, report) _compile_files_into_pdf(tmpdir, logfile, auxfile, max_runs) return (tmpdir / 'out.pdf').read_bytes()
def _prepare_latex_support_files(tmpdir, env, kwargs): template = env.get_template('base.tex') tex = template.render(**kwargs) texfile = tmpdir / 'out.tex' texfile.write_text(tex) auxfile = tmpdir / 'out.aux' logfile = tmpdir / 'out.log' return logfile, auxfile def _save_figures_to_pdf(tmpdir, report): figdir = tmpdir / 'figs' figdir.mkdir() for fig in report.raw_report.plots.figures: name = ( fig.category + '+' + fig.metric + '+' + fig.name ).replace('^', '-').replace(' ', '+').replace('_', '+').replace( '<=', 'lte').replace('%', 'pct').replace('.', '').replace('/', '-') name += '.pdf' # handle characters that will cause problems for tex figpath = figdir / name figpath.write_bytes(base64.a85decode(fig.pdf)) def _compile_files_into_pdf(tmpdir, logfile, auxfile, max_runs): args = ( 'pdflatex', '-interaction=batchmode', '-halt-on-error', '-no-shell-escape', '-file-line-error', 'out.tex' ) runs_left = max_runs prev_aux = 'nothing to see here' # run pdflatex until it settles while runs_left > 0: try: subprocess.run(args, check=True, cwd=str(tmpdir.absolute())) except subprocess.CalledProcessError: try: logger.exception(logfile.read_text()) except Exception: logger.exception('Pdflatex failed and so did reading log') raise aux = auxfile.read_text() if aux == prev_aux: break else: prev_aux = aux runs_left -= 1 else: raise RuntimeError( f'PDF generation unstable after {max_runs} runs')