LinearTrend and ROI transformations

2020-01-24 00:33:17 -03:00 · 2020-01-24 00:33:17 -03:00 · 40bcd43230
commit 40bcd43230
parent 87a50c1342
1 changed files with 108 additions and 11 deletions
--- a/pyFTS/common/Transformations.py
+++ b/pyFTS/common/Transformations.py
@ -3,6 +3,7 @@ Common data transformation used on pre and post processing of the FTS
 """

 import numpy as np
+import pandas as pd
 import math
 from pyFTS import *

@ -45,6 +46,9 @@ class Transformation(object):
 class Differential(Transformation):
    """
    Differentiation data transform
+
+    y'(t) = y(t) - y(t-1)
+    y(t) =  y(t-1)  + y'(t)
    """
    def __init__(self, lag):
        super(Differential, self).__init__()
@ -193,6 +197,9 @@ class AdaptiveExpectation(Transformation):
 class BoxCox(Transformation):
    """
    Box-Cox power transformation
+
+    y'(t) = log( y(t) )
+    y(t) = exp( y'(t) )
    """
    def __init__(self, plambda):
        super(BoxCox, self).__init__()
@ -225,16 +232,106 @@ def Z(original):
    return z


-# retrieved from Sadaei and Lee (2014) - Multilayer Stock ForecastingModel Using Fuzzy Time Series
-def roi(original):
-    n = len(original)
-    roi = []
-    for t in np.arange(0, n-1):
-        roi.append( (original[t+1] - original[t])/original[t]  )
-    return roi
+class ROI(Transformation):
+    """
+    Return of Investment (ROI) transformation. Retrieved from Sadaei and Lee (2014) - Multilayer Stock
+    Forecasting Model Using Fuzzy Time Series

-def smoothing(original, lags):
-    pass
+    y'(t) = ( y(t) - y(t-1) ) / y(t-1)
+    y(t) = ( y(t-1) * y'(t) ) + y(t-1)
+    """
+    def __init__(self, **kwargs):
+        super(ROI, self).__init__()
+        self.name = 'ROI'

-def aggregate(original, operation):
-    pass
+    def apply(self, data, param=None, **kwargs):
+        modified = [(data[i] - data[i - 1]) / data[i - 1] for i in np.arange(1, len(data))]
+        modified.insert(0, .0)
+        return modified
+
+    def inverse(self, data, param=None, **kwargs):
+        modified = [param[0]]
+        for i in np.arange(1, len(data)):
+            modified.append((modified[i - 1] * data[i]) + modified[i - 1])
+        return modified
+
+
+class LinearTrend(Transformation):
+    """
+    Linear Trend. Estimate
+
+    y'(t) = y(t) - (a*t+b)
+    y(t) =  y'(t) + (a*t+b)
+    """
+    def __init__(self, **kwargs):
+        super(LinearTrend, self).__init__()
+        self.name = 'LinearTrend'
+        self.index_type = kwargs.get('index_type','linear')
+        '''The type of the time index used to train the regression coefficients. Available types are: field, datetime'''
+        self.index_field = kwargs.get('index_field', None)
+        '''The Pandas Dataframe column to use as index'''
+        self.data_field = kwargs.get('data_field', None)
+        '''The Pandas Dataframe column to use as data'''
+        self.datetime_mask = kwargs.get('datetime_mask', None)
+        '''The Pandas Dataframe mask for datetime indexes '''
+
+        self.model = None
+        '''Regression model'''
+
+    def train(self, data, **kwargs):
+        from pandas import datetime
+        from sklearn.linear_model import LinearRegression
+
+        x = data[self.index_field].values
+
+        if self.index_type == 'datetime':
+            x = pd.to_numeric(x, downcast='integer')
+
+        indexes = np.reshape(x, (len(x), 1))
+        values = data[self.data_field].values
+        self.model = LinearRegression()
+        self.model.fit(indexes, values)
+
+    def trend(self, data):
+        x = data[self.index_field].values
+        if self.index_type == 'datetime':
+            x = pd.to_numeric(x, downcast='integer')
+        indexes = np.reshape(x, (len(x), 1))
+        _trend = self.model.predict(indexes)
+        return _trend
+
+    def apply(self, data, param=None, **kwargs):
+        values = data[self.data_field].values
+        _trend = self.trend(data)
+        modified = values - _trend
+        return modified
+
+    def inverse(self, data, param=None, **kwargs):
+        x = self.generate_indexes(data, param[self.index_field].values[0], **kwargs)
+        indexes = np.reshape(x, (len(x), 1))
+        _trend = self.model.predict(indexes)
+        modified = data + _trend
+        return modified
+
+    def increment(self,value, **kwargs):
+        if self.index_type == 'linear':
+            return value + 1
+        elif self.index_type == 'datetime':
+            if 'date_offset' not in kwargs:
+                raise Exception('A pandas.DateOffset must be passed in the parameter ''date_offset''')
+            doff = kwargs.get('date_offset')
+            return value + doff
+
+    def generate_indexes(self, data, value, **kwargs):
+        if self.index_type == 'datetime':
+            ret = [self.increment(pd.to_datetime(value, format=self.datetime_mask), **kwargs)]
+        else:
+            ret = [self.increment(value, **kwargs)]
+        for i in np.arange(1,len(data)):
+            ret.append(self.increment(ret[-1], **kwargs))
+
+        if self.index_type == 'datetime':
+            ret = pd.Series(ret)
+            ret = pd.to_numeric(ret, downcast='integer')
+
+        return np.array(ret)