diff --git a/pyFTS/benchmarks/Measures.py b/pyFTS/benchmarks/Measures.py index b8dae1a..1b308c8 100644 --- a/pyFTS/benchmarks/Measures.py +++ b/pyFTS/benchmarks/Measures.py @@ -294,19 +294,24 @@ def get_point_statistics(data, model, **kwargs): if indexer is not None: ndata = np.array(indexer.get_data(data)) else: - ndata = np.array(data[model.order:]) + ndata = np.array(data) ret = list() if steps_ahead == 1: - forecasts = model.predict(data, **kwargs) + forecasts = model.predict(ndata, **kwargs) + + if not isinstance(forecasts, (list, np.ndarray)): + forecasts = [forecasts] + if model.has_seasonality: nforecasts = np.array(forecasts) else: nforecasts = np.array(forecasts[:-1]) - ret.append(np.round(rmse(ndata, nforecasts), 2)) - ret.append(np.round(smape(ndata, nforecasts), 2)) - ret.append(np.round(UStatistic(ndata, nforecasts), 2)) + + ret.append(np.round(rmse(ndata[model.order:], nforecasts), 2)) + ret.append(np.round(smape(ndata[model.order:], nforecasts), 2)) + ret.append(np.round(UStatistic(ndata[model.order:], nforecasts), 2)) else: steps_ahead_sampler = kwargs.get('steps_ahead_sampler', 1) nforecasts = [] diff --git a/pyFTS/benchmarks/Util.py b/pyFTS/benchmarks/Util.py index 05aa31f..361e90c 100644 --- a/pyFTS/benchmarks/Util.py +++ b/pyFTS/benchmarks/Util.py @@ -47,14 +47,17 @@ def insert_benchmark(data, conn): def process_common_data(dataset, tag, type, job): model = job["obj"] - if not model.benchmark_only: + if model.benchmark_only: + data = [dataset, tag, type, model.shortname, + str(model.transformations[0]) if len(model.transformations) > 0 else None, + model.order, None, None, + None, job['steps'], job['method']] + else: data = [dataset, tag, type, model.shortname, str(model.partitioner.transformation) if model.partitioner.transformation is not None else None, model.order, model.partitioner.name, str(model.partitioner.partitions), len(model), job['steps'], job['method']] - else: - data = [tag, type, model.shortname, None, model.order, None, None, - None, job['steps'], job['method']] + return data diff --git a/pyFTS/benchmarks/arima.py b/pyFTS/benchmarks/arima.py index af89d76..b786eea 100644 --- a/pyFTS/benchmarks/arima.py +++ b/pyFTS/benchmarks/arima.py @@ -31,20 +31,27 @@ class ARIMA(fts.FTS): self.min_order = 1 self.alpha = kwargs.get("alpha", 0.05) self.shortname += str(self.alpha) + self._decompose_order(self.order) - def train(self, data, sets, order, parameters=None): - self.p = order[0] - self.d = order[1] - self.q = order[2] - self.order = self.p + self.q - self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha) + def _decompose_order(self, order): + if isinstance(order, (tuple, set, list)): + self.p = order[0] + self.d = order[1] + self.q = order[2] + self.order = self.p + self.q + (self.q - 1 if self.q > 0 else 0) + self.d = len(self.transformations) + self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha) + + def train(self, data, **kwargs): + if kwargs.get('order', None) is not None: + order = kwargs.get('order', (1,0,0)) + self._decompose_order(order) if self.indexer is not None: data = self.indexer.get_data(data) - data = self.apply_transformations(data, updateUoD=True) + #data = self.apply_transformations(data, updateUoD=True) - old_fit = self.model_fit try: self.model = stats_arima(data, order=(self.p, self.d, self.q)) self.model_fit = self.model.fit(disp=0) @@ -58,34 +65,32 @@ class ARIMA(fts.FTS): def ma(self, data): return data.dot(self.model_fit.maparams) - def forecast(self, data, **kwargs): + def forecast(self, ndata, **kwargs): if self.model_fit is None: return np.nan - if self.indexer is not None and isinstance(data, pd.DataFrame): - data = self.indexer.get_data(data) + if self.indexer is not None and isinstance(ndata, pd.DataFrame): + data = self.indexer.get_data(ndata) - ndata = np.array(self.apply_transformations(data)) + ndata = np.array(ndata) l = len(ndata) ret = [] - if self.d == 0: - ar = np.array([self.ar(ndata[k - self.p: k]) for k in np.arange(self.p, l+1)]) #+1 to forecast one step ahead given all available lags - else: - ar = np.array([ndata[k] + self.ar(ndata[k - self.p: k]) for k in np.arange(self.p, l+1)]) + ar = np.array([self.ar(ndata[k - self.p: k]) for k in np.arange(self.p, l+1)]) #+1 to forecast one step ahead given all available lags if self.q > 0: - residuals = np.array([ndata[k] - ar[k - self.p] for k in np.arange(self.p, l)]) + residuals = ndata[self.p-1:] - ar - ma = np.array([self.ma(residuals[k - self.q: k]) for k in np.arange(self.q, len(residuals)+1)]) + ma = np.array([self.ma(residuals[k - self.q: k]) for k in np.arange(self.q, len(residuals) + 1)]) - ret = ar[self.q:] + ma + ret = ar[self.q - 1:] + ma + ret = ret[self.q:] else: ret = ar - ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]]) + #ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]]) nforecasts = np.array(forecasts) return ret @@ -121,7 +126,7 @@ class ARIMA(fts.FTS): return ret - def forecast_ahead_interval(self, data, steps, **kwargs): + def forecast_ahead_interval(self, ndata, steps, **kwargs): if self.model_fit is None: return np.nan @@ -129,7 +134,7 @@ class ARIMA(fts.FTS): sigma = np.sqrt(self.model_fit.sigma2) - ndata = np.array(self.apply_transformations(data)) + #ndata = np.array(self.apply_transformations(data)) l = len(ndata) @@ -147,13 +152,10 @@ class ARIMA(fts.FTS): ret.append(tmp) - ret = self.apply_inverse_transformations(ret, params=[[data[-1] for a in np.arange(0, steps)]], interval=True) + #ret = self.apply_inverse_transformations(ret, params=[[data[-1] for a in np.arange(0, steps)]], interval=True) return ret - def empty_grid(self, resolution): - return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution) - def forecast_distribution(self, data, **kwargs): if self.indexer is not None and isinstance(data, pd.DataFrame): diff --git a/pyFTS/benchmarks/benchmarks.py b/pyFTS/benchmarks/benchmarks.py index 02f39a3..562b377 100644 --- a/pyFTS/benchmarks/benchmarks.py +++ b/pyFTS/benchmarks/benchmarks.py @@ -151,7 +151,7 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs): elif type == 'distribution': benchmark_methods = get_benchmark_probabilistic_methods() - if benchmark_models is not None: + if isinstance(benchmark_models, list) : pool.extend(benchmark_models) elif benchmark_methods is not None: for count, model in enumerate(benchmark_methods, start=0): @@ -342,6 +342,7 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwarg if mfts.benchmark_only: _key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + mfts.append_transformation(partitioner.transformation) else: pttr = str(partitioner.__module__).split('.')[-1] _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) @@ -356,6 +357,7 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwarg _end = time.time() times = _end - _start + _start = time.time() _rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, **kwargs) _end = time.time() diff --git a/pyFTS/common/FuzzySet.py b/pyFTS/common/FuzzySet.py index 087d6dd..ec7d44d 100644 --- a/pyFTS/common/FuzzySet.py +++ b/pyFTS/common/FuzzySet.py @@ -147,10 +147,18 @@ def fuzzyfy_series(data, fuzzySets, method='maximum'): return fts +def grant_bounds(data, sets, ordered_sets): + if data < sets[ordered_sets[0]].lower: + return sets[ordered_sets[0]].lower + elif data > sets[ordered_sets[-1]].upper: + return sets[ordered_sets[-1]].upper + else: + return data + def check_bounds(data, sets, ordered_sets): - if data < sets[ordered_sets[0]].get_lower(): + if data < sets[ordered_sets[0]].lower: return sets[ordered_sets[0]] - elif data > sets[ordered_sets[-1]].get_upper(): + elif data > sets[ordered_sets[-1]].upper: return sets[ordered_sets[-1]] diff --git a/pyFTS/models/hofts.py b/pyFTS/models/hofts.py index bdc565d..16a4224 100644 --- a/pyFTS/models/hofts.py +++ b/pyFTS/models/hofts.py @@ -46,6 +46,7 @@ class HighOrderFTS(fts.FTS): self.order = kwargs.get('order',1) self.setsDict = {} self.is_high_order = True + self.min_order = 2 def generate_lhs_flrg(self, sample): lags = {} diff --git a/pyFTS/models/hwang.py b/pyFTS/models/hwang.py index a382598..dcb3eeb 100644 --- a/pyFTS/models/hwang.py +++ b/pyFTS/models/hwang.py @@ -20,7 +20,11 @@ class HighOrderFTS(fts.FTS): def forecast(self, ndata, **kwargs): - ordered_sets = FuzzySet.set_ordered(self.sets) + if self.sets == None: + self.sets = self.partitioner.sets + ordered_sets = self.partitioner.ordered_sets + else: + ordered_sets = FuzzySet.set_ordered(self.sets) l = len(self.sets) @@ -35,9 +39,9 @@ class HighOrderFTS(fts.FTS): for ix in range(l): s = ordered_sets[ix] - cn[ix] = self.sets[s].membership(ndata[t]) + cn[ix] = self.sets[s].membership( FuzzySet.grant_bounds(ndata[t], self.sets, ordered_sets)) for w in range(self.order - 1): - ow[w, ix] = self.sets[s].membership(ndata[t - w]) + ow[w, ix] = self.sets[s].membership(FuzzySet.grant_bounds(ndata[t - w], self.sets, ordered_sets)) rn[w, ix] = ow[w, ix] * cn[ix] ft[ix] = max(ft[ix], rn[w, ix]) mft = max(ft) @@ -55,4 +59,7 @@ class HighOrderFTS(fts.FTS): def train(self, data, **kwargs): if kwargs.get('sets', None) is not None: self.sets = kwargs.get('sets', None) - self.order = kwargs.get('order', 1) \ No newline at end of file + else: + self.sets = self.partitioner.sets + + self.order = kwargs.get('order', 2) \ No newline at end of file diff --git a/pyFTS/tests/general.py b/pyFTS/tests/general.py index 062e4bc..152c305 100644 --- a/pyFTS/tests/general.py +++ b/pyFTS/tests/general.py @@ -17,7 +17,7 @@ dataset = TAIEX.get_data() from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures -from pyFTS.models import pwfts +from pyFTS.models import pwfts, song ''' from pyFTS.partitioners import Grid, Util as pUtil partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff) @@ -32,12 +32,19 @@ print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead ''' #''' + +from pyFTS.benchmarks import arima, naive, quantreg + bchmk.sliding_window_benchmarks(dataset[:1000], 1000, train=0.8, inc=0.2, - #methods=[pwfts.ProbabilisticWeightedFTS], - benchmark_models=False, - #transformations=[tdiff], - orders=[1], #[1, 2, 3], - partitions=[20], #np.arange(10, 100, 5), + #methods=[song.ConventionalFTS], #[pwfts.ProbabilisticWeightedFTS], + benchmark_models=True, + benchmark_methods=[naive.Naive, arima.ARIMA,arima.ARIMA], #arima.ARIMA,arima.ARIMA], + #benchmark_methods=[arima.ARIMA], + benchmark_methods_parameters=[1,(1,0,0),(1,0,1)], #(2,0,1),(2,0,2)], + #benchmark_methods_parameters=[(1,0,0)], + transformations=[None, tdiff], + orders=[1, 2, 3], + partitions=[35], #np.arange(10, 100, 5), progress=True, type='point', #steps_ahead=[1,4,7,10], #steps_ahead=[1] #distributed=True, nodes=['192.168.0.110', '192.168.0.105','192.168.0.106'],