diff --git a/pyFTS/common/Transformations.py b/pyFTS/common/Transformations.py index fedb2d0..2f56e14 100644 --- a/pyFTS/common/Transformations.py +++ b/pyFTS/common/Transformations.py @@ -3,6 +3,7 @@ Common data transformation used on pre and post processing of the FTS """ import numpy as np +import pandas as pd import math from pyFTS import * @@ -45,6 +46,9 @@ class Transformation(object): class Differential(Transformation): """ Differentiation data transform + + y'(t) = y(t) - y(t-1) + y(t) = y(t-1) + y'(t) """ def __init__(self, lag): super(Differential, self).__init__() @@ -193,6 +197,9 @@ class AdaptiveExpectation(Transformation): class BoxCox(Transformation): """ Box-Cox power transformation + + y'(t) = log( y(t) ) + y(t) = exp( y'(t) ) """ def __init__(self, plambda): super(BoxCox, self).__init__() @@ -225,16 +232,106 @@ def Z(original): return z -# retrieved from Sadaei and Lee (2014) - Multilayer Stock ForecastingModel Using Fuzzy Time Series -def roi(original): - n = len(original) - roi = [] - for t in np.arange(0, n-1): - roi.append( (original[t+1] - original[t])/original[t] ) - return roi +class ROI(Transformation): + """ + Return of Investment (ROI) transformation. Retrieved from Sadaei and Lee (2014) - Multilayer Stock + Forecasting Model Using Fuzzy Time Series -def smoothing(original, lags): - pass + y'(t) = ( y(t) - y(t-1) ) / y(t-1) + y(t) = ( y(t-1) * y'(t) ) + y(t-1) + """ + def __init__(self, **kwargs): + super(ROI, self).__init__() + self.name = 'ROI' -def aggregate(original, operation): - pass + def apply(self, data, param=None, **kwargs): + modified = [(data[i] - data[i - 1]) / data[i - 1] for i in np.arange(1, len(data))] + modified.insert(0, .0) + return modified + + def inverse(self, data, param=None, **kwargs): + modified = [param[0]] + for i in np.arange(1, len(data)): + modified.append((modified[i - 1] * data[i]) + modified[i - 1]) + return modified + + +class LinearTrend(Transformation): + """ + Linear Trend. Estimate + + y'(t) = y(t) - (a*t+b) + y(t) = y'(t) + (a*t+b) + """ + def __init__(self, **kwargs): + super(LinearTrend, self).__init__() + self.name = 'LinearTrend' + self.index_type = kwargs.get('index_type','linear') + '''The type of the time index used to train the regression coefficients. Available types are: field, datetime''' + self.index_field = kwargs.get('index_field', None) + '''The Pandas Dataframe column to use as index''' + self.data_field = kwargs.get('data_field', None) + '''The Pandas Dataframe column to use as data''' + self.datetime_mask = kwargs.get('datetime_mask', None) + '''The Pandas Dataframe mask for datetime indexes ''' + + self.model = None + '''Regression model''' + + def train(self, data, **kwargs): + from pandas import datetime + from sklearn.linear_model import LinearRegression + + x = data[self.index_field].values + + if self.index_type == 'datetime': + x = pd.to_numeric(x, downcast='integer') + + indexes = np.reshape(x, (len(x), 1)) + values = data[self.data_field].values + self.model = LinearRegression() + self.model.fit(indexes, values) + + def trend(self, data): + x = data[self.index_field].values + if self.index_type == 'datetime': + x = pd.to_numeric(x, downcast='integer') + indexes = np.reshape(x, (len(x), 1)) + _trend = self.model.predict(indexes) + return _trend + + def apply(self, data, param=None, **kwargs): + values = data[self.data_field].values + _trend = self.trend(data) + modified = values - _trend + return modified + + def inverse(self, data, param=None, **kwargs): + x = self.generate_indexes(data, param[self.index_field].values[0], **kwargs) + indexes = np.reshape(x, (len(x), 1)) + _trend = self.model.predict(indexes) + modified = data + _trend + return modified + + def increment(self,value, **kwargs): + if self.index_type == 'linear': + return value + 1 + elif self.index_type == 'datetime': + if 'date_offset' not in kwargs: + raise Exception('A pandas.DateOffset must be passed in the parameter ''date_offset''') + doff = kwargs.get('date_offset') + return value + doff + + def generate_indexes(self, data, value, **kwargs): + if self.index_type == 'datetime': + ret = [self.increment(pd.to_datetime(value, format=self.datetime_mask), **kwargs)] + else: + ret = [self.increment(value, **kwargs)] + for i in np.arange(1,len(data)): + ret.append(self.increment(ret[-1], **kwargs)) + + if self.index_type == 'datetime': + ret = pd.Series(ret) + ret = pd.to_numeric(ret, downcast='integer') + + return np.array(ret)