LinearTrend and ROI transformations

This commit is contained in:
Petrônio Cândido 2020-01-24 00:33:17 -03:00
parent 87a50c1342
commit 40bcd43230

View File

@ -3,6 +3,7 @@ Common data transformation used on pre and post processing of the FTS
""" """
import numpy as np import numpy as np
import pandas as pd
import math import math
from pyFTS import * from pyFTS import *
@ -45,6 +46,9 @@ class Transformation(object):
class Differential(Transformation): class Differential(Transformation):
""" """
Differentiation data transform Differentiation data transform
y'(t) = y(t) - y(t-1)
y(t) = y(t-1) + y'(t)
""" """
def __init__(self, lag): def __init__(self, lag):
super(Differential, self).__init__() super(Differential, self).__init__()
@ -193,6 +197,9 @@ class AdaptiveExpectation(Transformation):
class BoxCox(Transformation): class BoxCox(Transformation):
""" """
Box-Cox power transformation Box-Cox power transformation
y'(t) = log( y(t) )
y(t) = exp( y'(t) )
""" """
def __init__(self, plambda): def __init__(self, plambda):
super(BoxCox, self).__init__() super(BoxCox, self).__init__()
@ -225,16 +232,106 @@ def Z(original):
return z return z
# retrieved from Sadaei and Lee (2014) - Multilayer Stock ForecastingModel Using Fuzzy Time Series class ROI(Transformation):
def roi(original): """
n = len(original) Return of Investment (ROI) transformation. Retrieved from Sadaei and Lee (2014) - Multilayer Stock
roi = [] Forecasting Model Using Fuzzy Time Series
for t in np.arange(0, n-1):
roi.append( (original[t+1] - original[t])/original[t] )
return roi
def smoothing(original, lags): y'(t) = ( y(t) - y(t-1) ) / y(t-1)
pass y(t) = ( y(t-1) * y'(t) ) + y(t-1)
"""
def __init__(self, **kwargs):
super(ROI, self).__init__()
self.name = 'ROI'
def aggregate(original, operation): def apply(self, data, param=None, **kwargs):
pass modified = [(data[i] - data[i - 1]) / data[i - 1] for i in np.arange(1, len(data))]
modified.insert(0, .0)
return modified
def inverse(self, data, param=None, **kwargs):
modified = [param[0]]
for i in np.arange(1, len(data)):
modified.append((modified[i - 1] * data[i]) + modified[i - 1])
return modified
class LinearTrend(Transformation):
"""
Linear Trend. Estimate
y'(t) = y(t) - (a*t+b)
y(t) = y'(t) + (a*t+b)
"""
def __init__(self, **kwargs):
super(LinearTrend, self).__init__()
self.name = 'LinearTrend'
self.index_type = kwargs.get('index_type','linear')
'''The type of the time index used to train the regression coefficients. Available types are: field, datetime'''
self.index_field = kwargs.get('index_field', None)
'''The Pandas Dataframe column to use as index'''
self.data_field = kwargs.get('data_field', None)
'''The Pandas Dataframe column to use as data'''
self.datetime_mask = kwargs.get('datetime_mask', None)
'''The Pandas Dataframe mask for datetime indexes '''
self.model = None
'''Regression model'''
def train(self, data, **kwargs):
from pandas import datetime
from sklearn.linear_model import LinearRegression
x = data[self.index_field].values
if self.index_type == 'datetime':
x = pd.to_numeric(x, downcast='integer')
indexes = np.reshape(x, (len(x), 1))
values = data[self.data_field].values
self.model = LinearRegression()
self.model.fit(indexes, values)
def trend(self, data):
x = data[self.index_field].values
if self.index_type == 'datetime':
x = pd.to_numeric(x, downcast='integer')
indexes = np.reshape(x, (len(x), 1))
_trend = self.model.predict(indexes)
return _trend
def apply(self, data, param=None, **kwargs):
values = data[self.data_field].values
_trend = self.trend(data)
modified = values - _trend
return modified
def inverse(self, data, param=None, **kwargs):
x = self.generate_indexes(data, param[self.index_field].values[0], **kwargs)
indexes = np.reshape(x, (len(x), 1))
_trend = self.model.predict(indexes)
modified = data + _trend
return modified
def increment(self,value, **kwargs):
if self.index_type == 'linear':
return value + 1
elif self.index_type == 'datetime':
if 'date_offset' not in kwargs:
raise Exception('A pandas.DateOffset must be passed in the parameter ''date_offset''')
doff = kwargs.get('date_offset')
return value + doff
def generate_indexes(self, data, value, **kwargs):
if self.index_type == 'datetime':
ret = [self.increment(pd.to_datetime(value, format=self.datetime_mask), **kwargs)]
else:
ret = [self.increment(value, **kwargs)]
for i in np.arange(1,len(data)):
ret.append(self.increment(ret[-1], **kwargs))
if self.index_type == 'datetime':
ret = pd.Series(ret)
ret = pd.to_numeric(ret, downcast='integer')
return np.array(ret)