LinearTrend and ROI transformations
This commit is contained in:
parent
87a50c1342
commit
40bcd43230
@ -3,6 +3,7 @@ Common data transformation used on pre and post processing of the FTS
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import math
|
||||
from pyFTS import *
|
||||
|
||||
@ -45,6 +46,9 @@ class Transformation(object):
|
||||
class Differential(Transformation):
|
||||
"""
|
||||
Differentiation data transform
|
||||
|
||||
y'(t) = y(t) - y(t-1)
|
||||
y(t) = y(t-1) + y'(t)
|
||||
"""
|
||||
def __init__(self, lag):
|
||||
super(Differential, self).__init__()
|
||||
@ -193,6 +197,9 @@ class AdaptiveExpectation(Transformation):
|
||||
class BoxCox(Transformation):
|
||||
"""
|
||||
Box-Cox power transformation
|
||||
|
||||
y'(t) = log( y(t) )
|
||||
y(t) = exp( y'(t) )
|
||||
"""
|
||||
def __init__(self, plambda):
|
||||
super(BoxCox, self).__init__()
|
||||
@ -225,16 +232,106 @@ def Z(original):
|
||||
return z
|
||||
|
||||
|
||||
# retrieved from Sadaei and Lee (2014) - Multilayer Stock ForecastingModel Using Fuzzy Time Series
|
||||
def roi(original):
|
||||
n = len(original)
|
||||
roi = []
|
||||
for t in np.arange(0, n-1):
|
||||
roi.append( (original[t+1] - original[t])/original[t] )
|
||||
return roi
|
||||
class ROI(Transformation):
|
||||
"""
|
||||
Return of Investment (ROI) transformation. Retrieved from Sadaei and Lee (2014) - Multilayer Stock
|
||||
Forecasting Model Using Fuzzy Time Series
|
||||
|
||||
def smoothing(original, lags):
|
||||
pass
|
||||
y'(t) = ( y(t) - y(t-1) ) / y(t-1)
|
||||
y(t) = ( y(t-1) * y'(t) ) + y(t-1)
|
||||
"""
|
||||
def __init__(self, **kwargs):
|
||||
super(ROI, self).__init__()
|
||||
self.name = 'ROI'
|
||||
|
||||
def aggregate(original, operation):
|
||||
pass
|
||||
def apply(self, data, param=None, **kwargs):
|
||||
modified = [(data[i] - data[i - 1]) / data[i - 1] for i in np.arange(1, len(data))]
|
||||
modified.insert(0, .0)
|
||||
return modified
|
||||
|
||||
def inverse(self, data, param=None, **kwargs):
|
||||
modified = [param[0]]
|
||||
for i in np.arange(1, len(data)):
|
||||
modified.append((modified[i - 1] * data[i]) + modified[i - 1])
|
||||
return modified
|
||||
|
||||
|
||||
class LinearTrend(Transformation):
|
||||
"""
|
||||
Linear Trend. Estimate
|
||||
|
||||
y'(t) = y(t) - (a*t+b)
|
||||
y(t) = y'(t) + (a*t+b)
|
||||
"""
|
||||
def __init__(self, **kwargs):
|
||||
super(LinearTrend, self).__init__()
|
||||
self.name = 'LinearTrend'
|
||||
self.index_type = kwargs.get('index_type','linear')
|
||||
'''The type of the time index used to train the regression coefficients. Available types are: field, datetime'''
|
||||
self.index_field = kwargs.get('index_field', None)
|
||||
'''The Pandas Dataframe column to use as index'''
|
||||
self.data_field = kwargs.get('data_field', None)
|
||||
'''The Pandas Dataframe column to use as data'''
|
||||
self.datetime_mask = kwargs.get('datetime_mask', None)
|
||||
'''The Pandas Dataframe mask for datetime indexes '''
|
||||
|
||||
self.model = None
|
||||
'''Regression model'''
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
from pandas import datetime
|
||||
from sklearn.linear_model import LinearRegression
|
||||
|
||||
x = data[self.index_field].values
|
||||
|
||||
if self.index_type == 'datetime':
|
||||
x = pd.to_numeric(x, downcast='integer')
|
||||
|
||||
indexes = np.reshape(x, (len(x), 1))
|
||||
values = data[self.data_field].values
|
||||
self.model = LinearRegression()
|
||||
self.model.fit(indexes, values)
|
||||
|
||||
def trend(self, data):
|
||||
x = data[self.index_field].values
|
||||
if self.index_type == 'datetime':
|
||||
x = pd.to_numeric(x, downcast='integer')
|
||||
indexes = np.reshape(x, (len(x), 1))
|
||||
_trend = self.model.predict(indexes)
|
||||
return _trend
|
||||
|
||||
def apply(self, data, param=None, **kwargs):
|
||||
values = data[self.data_field].values
|
||||
_trend = self.trend(data)
|
||||
modified = values - _trend
|
||||
return modified
|
||||
|
||||
def inverse(self, data, param=None, **kwargs):
|
||||
x = self.generate_indexes(data, param[self.index_field].values[0], **kwargs)
|
||||
indexes = np.reshape(x, (len(x), 1))
|
||||
_trend = self.model.predict(indexes)
|
||||
modified = data + _trend
|
||||
return modified
|
||||
|
||||
def increment(self,value, **kwargs):
|
||||
if self.index_type == 'linear':
|
||||
return value + 1
|
||||
elif self.index_type == 'datetime':
|
||||
if 'date_offset' not in kwargs:
|
||||
raise Exception('A pandas.DateOffset must be passed in the parameter ''date_offset''')
|
||||
doff = kwargs.get('date_offset')
|
||||
return value + doff
|
||||
|
||||
def generate_indexes(self, data, value, **kwargs):
|
||||
if self.index_type == 'datetime':
|
||||
ret = [self.increment(pd.to_datetime(value, format=self.datetime_mask), **kwargs)]
|
||||
else:
|
||||
ret = [self.increment(value, **kwargs)]
|
||||
for i in np.arange(1,len(data)):
|
||||
ret.append(self.increment(ret[-1], **kwargs))
|
||||
|
||||
if self.index_type == 'datetime':
|
||||
ret = pd.Series(ret)
|
||||
ret = pd.to_numeric(ret, downcast='integer')
|
||||
|
||||
return np.array(ret)
|
||||
|
Loading…
Reference in New Issue
Block a user