pyFTS/pyFTS/common/Util.py

"""
Common facilities for pyFTS
"""

import time
import matplotlib.pyplot as plt
import dill
import numpy as np
import pandas as pd
import matplotlib.cm as cmx
import matplotlib.colors as pltcolors
from pyFTS.probabilistic import ProbabilityDistribution
from pyFTS.common import Transformations


def plot_compared_intervals_ahead(original, models, colors, distributions, time_from, time_to, intervals = True,
                               save=False, file=None, tam=[20, 5], resolution=None,
                               cmap='Blues', linewidth=1.5):
    """
    Plot the forecasts of several one step ahead models, by point or by interval

    :param original: Original time series data (list)
    :param models: List of models to compare
    :param colors: List of models colors
    :param distributions: True to plot a distribution
    :param time_from: index of data poit to start the ahead forecasting
    :param time_to: number of steps ahead to forecast
    :param interpol: Fill space between distribution plots
    :param save: Save the picture on file
    :param file: Filename to save the picture
    :param tam: Size of the picture
    :param resolution:
    :param cmap: Color map to be used on distribution plot
    :param option: Distribution type to be passed for models
    :return:
    """
    fig = plt.figure(figsize=tam)
    ax = fig.add_subplot(111)

    cm = plt.get_cmap(cmap)
    cNorm = pltcolors.Normalize(vmin=0, vmax=1)
    scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=cm)

    if resolution is None: resolution = (max(original) - min(original)) / 100

    mi = []
    ma = []

    for count, fts in enumerate(models, start=0):
        if fts.has_probability_forecasting and distributions[count]:
            density = fts.forecast_ahead_distribution(original[time_from - fts.order:time_from], time_to,
                                                      resolution=resolution)

            #plot_density_scatter(ax, cmap, density, fig, resolution, time_from, time_to)
            plot_density_rectange(ax, cm, density, fig, resolution, time_from, time_to)

        if fts.has_interval_forecasting and intervals:
            forecasts = fts.forecast_ahead_interval(original[time_from - fts.order:time_from], time_to)
            lower = [kk[0] for kk in forecasts]
            upper = [kk[1] for kk in forecasts]
            mi.append(min(lower))
            ma.append(max(upper))
            for k in np.arange(0, time_from - fts.order):
                lower.insert(0, None)
                upper.insert(0, None)
            ax.plot(lower, color=colors[count], label=fts.shortname, linewidth=linewidth)
            ax.plot(upper, color=colors[count], linewidth=linewidth*1.5)

    ax.plot(original, color='black', label="Original", linewidth=linewidth*1.5)
    handles0, labels0 = ax.get_legend_handles_labels()
    if True in distributions:
        lgd = ax.legend(handles0, labels0, loc=2)
    else:
        lgd = ax.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1))
    _mi = min(mi)
    if _mi < 0:
        _mi *= 1.1
    else:
        _mi *= 0.9
    _ma = max(ma)
    if _ma < 0:
        _ma *= 0.9
    else:
        _ma *= 1.1

    ax.set_ylim([_mi, _ma])
    ax.set_ylabel('F(T)')
    ax.set_xlabel('T')
    ax.set_xlim([0, len(original)])

    show_and_save_image(fig, file, save, lgd=lgd)


def plot_density_rectange(ax, cmap, density, fig, resolution, time_from, time_to):
    """
    Auxiliar function to plot_compared_intervals_ahead
    """
    from matplotlib.patches import Rectangle
    from matplotlib.collections import PatchCollection
    patches = []
    colors = []
    for x in density.index:
        for y in density.columns:
            s = Rectangle((time_from + x, y), 1, resolution, fill=True, lw = 0)
            patches.append(s)
            colors.append(density[y][x]*5)
    pc = PatchCollection(patches=patches, match_original=True)
    pc.set_clim([0, 1])
    pc.set_cmap(cmap)
    pc.set_array(np.array(colors))
    ax.add_collection(pc)
    cb = fig.colorbar(pc, ax=ax)
    cb.set_label('Density')


def plot_probability_distributions(pmfs, lcolors, tam=[15, 7]):
    fig = plt.figure(figsize=tam)
    ax = fig.add_subplot(111)

    for k,m in enumerate(pmfs,start=0):
        m.plot(ax, color=lcolors[k])

    handles0, labels0 = ax.get_legend_handles_labels()
    ax.legend(handles0, labels0)

def plot_distribution(ax, cmap, probabilitydist, fig, time_from, reference_data=None):
    '''
    Plot forecasted ProbabilityDistribution objects on a matplotlib axis

    :param ax: matplotlib axis
    :param cmap: matplotlib colormap name
    :param probabilitydist: list of ProbabilityDistribution objects
    :param fig: matplotlib figure
    :param time_from: starting time (on x axis) to begin the plots
    :param reference_data:
    :return:
    '''
    from matplotlib.patches import Rectangle
    from matplotlib.collections import PatchCollection
    patches = []
    colors = []
    for ct, dt in enumerate(probabilitydist):
        disp = 0.0
        if reference_data is not None:
            disp = reference_data[time_from+ct]

        for y in dt.bins:
            s = Rectangle((time_from+ct, y+disp), 1, dt.resolution, fill=True, lw = 0)
            patches.append(s)
            colors.append(dt.density(y))
    scale = Transformations.Scale()
    colors = scale.apply(colors)
    pc = PatchCollection(patches=patches, match_original=True)
    pc.set_clim([0, 1])
    pc.set_cmap(cmap)
    pc.set_array(np.array(colors))
    ax.add_collection(pc)
    cb = fig.colorbar(pc, ax=ax)
    cb.set_label('Density')


def plot_distribution2(probabilitydist, data, **kwargs):
    '''
    Plot distributions over the time (x-axis)

    :param probabilitydist: the forecasted probability distributions to plot
    :param data: the original test sample
    :keyword start_at: the time index (inside of data) to start to plot the probability distributions
    :keyword ax: a matplotlib axis. If no value was provided a new axis is created.
    :keyword cmap: a matplotlib colormap name, the default value is 'Blues'
    :keyword quantiles: the list of quantiles intervals to plot, e. g. [.05, .25, .75, .95]
    :keyword median: a boolean value indicating if the median value will be plot.
    '''
    import matplotlib.colorbar as cbar
    import matplotlib.cm as cm

    order = kwargs.get('order', 1)

    ax = kwargs.get('ax',None)
    if ax is None:
        fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15, 5])

    l = len(probabilitydist)

    cmap = kwargs.get('cmap','Blues')
    cmap = plt.get_cmap(cmap)

    start_at = kwargs.get('start_at',0) + order - 1

    x = [k + start_at for k in range(l + 1)]

    qt = kwargs.get('quantiles',None)

    if qt is None:
        qt = [round(k, 2) for k in np.arange(.05, 1., .05)]
        qt.insert(0, .01)
        qt.append(.99)

    lq = len(qt)

    normal = plt.Normalize(min(qt), max(qt))
    scalarMap = cm.ScalarMappable(norm=normal, cmap=cmap)

    for ct in np.arange(1, int(lq / 2) + 1):
        y = [[data[start_at], data[start_at]]]
        for pd in probabilitydist:
            qts = pd.quantile([qt[ct - 1], qt[-ct]])
            y.append(qts)

        ax.fill_between(x, [k[0] for k in y], [k[1] for k in y],
                        facecolor=scalarMap.to_rgba(ct / lq))

    if kwargs.get('median',True):
        y = [data[start_at]]
        for pd in probabilitydist:
            qts = pd.quantile(.5)
            y.append(qts[0])

        ax.plot(x, y, color='red', label='Median')

    cax, _ = cbar.make_axes(ax)
    cb = cbar.ColorbarBase(cax, cmap=cmap, norm=normal)
    cb.set_label('Density')


def plot_interval(axis, intervals, order, label, color='red', typeonlegend=False, ls='-', linewidth=1):
    '''
    Plot forecasted intervals on matplotlib

    :param axis: matplotlib axis
    :param intervals: list of forecasted intervals
    :param order: order of the model that create the forecasts
    :param label: figure label
    :param color: matplotlib color name
    :param typeonlegend:
    :param ls: matplotlib line style
    :param linewidth: matplotlib width
    :return:
    '''
    lower = [kk[0] for kk in intervals]
    upper = [kk[1] for kk in intervals]
    mi = min(lower) * 0.95
    ma = max(upper) * 1.05
    for k in np.arange(0, order+1):
        lower.insert(0, None)
        upper.insert(0, None)
    if typeonlegend: label += " (Interval)"
    axis.plot(lower, color=color, label=label, ls=ls,linewidth=linewidth)
    axis.plot(upper, color=color, ls=ls,linewidth=linewidth)
    return [mi, ma]


def plot_interval2(intervals, data, **kwargs):
    '''
    Plot forecasted intervals on matplotlib

    :param intervals: list of forecasted intervals
    :param data: the original test sample
    :keyword start_at: the time index (inside of data) to start to plot the intervals
    :keyword label: figure label
    :keyword color: matplotlib color name
    :keyword typeonlegend:
    :keyword ls: matplotlib line style
    :keyword linewidth: matplotlib width
    '''

    l = len(intervals)

    start_at = kwargs.get('start_at', 1)

    ax = kwargs.get('ax', None)
    if ax is None:
        fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15, 5])

    for k in np.arange(0, start_at-1):
        intervals.insert(0, [None,None])

    intervals.insert(start_at, [data[start_at], data[start_at]])

    lower = [kk[0] for kk in intervals]
    upper = [kk[1] for kk in intervals]

    typeonlegend = kwargs.get('typeonlegend', False)
    color = kwargs.get('color', 'red')
    label = kwargs.get('label','')
    linewidth = kwargs.get('linewidth', 1)

    ls = kwargs.get('ls','-')

    if typeonlegend: label += " (Interval)"
    ax.plot(lower, color=color, label=label, ls=ls,linewidth=linewidth)
    ax.plot(upper, color=color, ls=ls,linewidth=linewidth)


def plot_rules(model, size=[5, 5], axis=None, rules_by_axis=None, columns=1):
    '''
    Plot the FLRG rules of a FTS model on a matplotlib axis

    :param model: FTS model
    :param size: figure size
    :param axis: matplotlib axis
    :param rules_by_axis: number of rules plotted by column
    :param columns: number of columns
    :return:
    '''
    if axis is None and rules_by_axis is None:
        rows = 1
    elif axis is None and rules_by_axis is not None:
        rows = (((len(model.flrgs.keys())//rules_by_axis)) // columns)+1

    fig, axis = plt.subplots(nrows=rows, ncols=columns, figsize=size)

    if rules_by_axis is None:
        draw_sets_on_axis(axis, model, size)

    _lhs = model.partitioner.ordered_sets if not model.is_high_order else model.flrgs.keys()

    for ct, key in enumerate(_lhs):

        xticks = []
        xtickslabels = []

        if rules_by_axis is None:
            ax = axis
        else:
            colcount = (ct // rules_by_axis) % columns
            rowcount = (ct // rules_by_axis) // columns

            if rows > 1 and columns > 1:
                ax = axis[rowcount, colcount]
            elif columns > 1:
                ax = axis[rowcount]
            else:
                ax = axis

            if ct % rules_by_axis == 0:
                draw_sets_on_axis(ax, model, size)

        if not model.is_high_order:
            if key in model.flrgs:
                x = (ct % rules_by_axis) + 1
                flrg = model.flrgs[key]
                y = model.sets[key].centroid
                ax.plot([x],[y],'o')
                xticks.append(x)
                xtickslabels.append(key)
                for rhs in flrg.RHS:
                    dest = model.sets[rhs].centroid
                    ax.arrow(x+.1, y, 0.8, dest - y, #length_includes_head=True,
                               head_width=0.1, head_length=0.1, shape='full', overhang=0,
                               fc='k', ec='k')
        else:
            flrg = model.flrgs[key]
            x = (ct%rules_by_axis)*model.order + 1
            for ct2, lhs in enumerate(flrg.LHS):
                y = model.sets[lhs].centroid
                ax.plot([x+ct2], [y], 'o')
                xticks.append(x+ct2)
                xtickslabels.append(lhs)
            for ct2 in range(1, model.order):
                fs1 = flrg.LHS[ct2-1]
                fs2 = flrg.LHS[ct2]
                y = model.sets[fs1].centroid
                dest = model.sets[fs2].centroid
                ax.plot([x+ct2-1,x+ct2], [y,dest],'-')

            y = model.sets[flrg.LHS[-1]].centroid
            for rhs in flrg.RHS:
                dest = model.sets[rhs].centroid
                ax.arrow(x + model.order -1 + .1, y, 0.8, dest - y,  # length_includes_head=True,
                           head_width=0.1, head_length=0.1, shape='full', overhang=0,
                           fc='k', ec='k')


        ax.set_xticks(xticks)
        ax.set_xticklabels(xtickslabels)
        ax.set_xlim([0,rules_by_axis*model.order+1])

    plt.tight_layout()
    plt.show()


def draw_sets_on_axis(axis, model, size):
    if axis is None:
        fig, axis = plt.subplots(nrows=1, ncols=1, figsize=size)
    for ct, key in enumerate(model.partitioner.ordered_sets):
        fs = model.sets[key]
        axis.plot([0, 1, 0], fs.parameters, label=fs.name)
        axis.axhline(fs.centroid, c="lightgray", alpha=0.5)
    axis.set_xlim([0, len(model.partitioner.ordered_sets)])
    axis.set_xticks(range(0, len(model.partitioner.ordered_sets)))
    tmp = ['']
    tmp.extend(model.partitioner.ordered_sets)
    axis.set_xticklabels(tmp)
    axis.set_ylim([model.partitioner.min, model.partitioner.max])
    axis.set_yticks([model.sets[k].centroid for k in model.partitioner.ordered_sets])
    axis.set_yticklabels([str(round(model.sets[k].centroid, 1)) + " - " + k
                          for k in model.partitioner.ordered_sets])


current_milli_time = lambda: int(round(time.time() * 1000))


def uniquefilename(name):
    if '.' in name:
        tmp = name.split('.')
        return  tmp[0] + str(current_milli_time()) + '.' + tmp[1]
    else:
        return name + str(current_milli_time())


def show_and_save_image(fig, file, flag, lgd=None):
    """
    Show and image and save on file

    :param fig: Matplotlib Figure object
    :param file: filename to save the picture
    :param flag: if True the image will be saved
    :param lgd: legend
    """
    plt.show()
    if flag:
        if lgd is not None:
            fig.savefig(file, additional_artists=lgd,bbox_inches='tight')  #bbox_extra_artists=(lgd,), )
        else:
            fig.savefig(file)
        plt.close(fig)


def enumerate2(xs, start=0, step=1):
    for x in xs:
        yield (start, x)
        start += step


def sliding_window(data, windowsize, train=0.8, inc=0.1, **kwargs):
    """
    Sliding window method of cross validation for time series

    :param data: the entire dataset
    :param windowsize: window size
    :param train: percentual of the window size will be used for training the models
    :param inc: percentual of data used for slide the window
    :return: window count, training set, test set
    """

    multivariate = True if isinstance(data, pd.DataFrame) else False

    l = len(data) if not multivariate else len(data.index)
    ttrain = int(round(windowsize * train, 0))
    ic = int(round(windowsize * inc, 0))

    progressbar = kwargs.get('progress', None)

    rng = np.arange(0,l-windowsize+ic,ic)

    if progressbar:
        from tqdm import tqdm
        rng = tqdm(rng)

    for count in rng:
        if count + windowsize > l:
            _end = l
        else:
            _end = count + windowsize
        if multivariate:
            yield (count, data.iloc[count: count + ttrain], data.iloc[count + ttrain: _end])
        else:
            yield (count,  data[count : count + ttrain], data[count + ttrain : _end]  )


def persist_obj(obj, file):
    """
    Persist an object on filesystem. This function depends on Dill package

    :param obj: object on memory
    :param file: file name to store the object
    """
    try:
        with open(file, 'wb') as _file:
            dill.dump(obj, _file)
    except Exception as ex:
        print("File {} could not be saved due exception {}".format(file, ex))


def load_obj(file):
    """
    Load to memory an object stored filesystem. This function depends on Dill package

    :param file: file name where the object is stored
    :return: object
    """
    with open(file, 'rb') as _file:
        obj = dill.load(_file)
    return obj


def persist_env(file):
    """
    Persist an entire environment on file. This function depends on Dill package

    :param file: file name to store the environment
    """
    dill.dump_session(file)


def load_env(file):
    dill.load_session(file)