All modules for which code is available
-- pyFTS.benchmarks.Measures +
- pyFTS.benchmarks.BSTS +
- pyFTS.benchmarks.Measures
- pyFTS.benchmarks.ResidualAnalysis +
- pyFTS.benchmarks.Tests
- pyFTS.benchmarks.Util
- pyFTS.benchmarks.arima
- pyFTS.benchmarks.benchmarks +
- pyFTS.benchmarks.gaussianproc
- pyFTS.benchmarks.knn
- pyFTS.benchmarks.naive
- pyFTS.benchmarks.quantreg @@ -130,6 +133,7 @@
- pyFTS.models.multivariate.granular
- pyFTS.models.multivariate.grid
- pyFTS.models.multivariate.mvfts +
- pyFTS.models.multivariate.partitioner
- pyFTS.models.multivariate.variable
- pyFTS.models.multivariate.wmvfts
- pyFTS.models.nonstationary.common diff --git a/docs/build/html/_modules/pyFTS/benchmarks/Measures.html b/docs/build/html/_modules/pyFTS/benchmarks/Measures.html deleted file mode 100644 index e25fa5a..0000000 --- a/docs/build/html/_modules/pyFTS/benchmarks/Measures.html +++ /dev/null @@ -1,554 +0,0 @@ - - - - - - - - -
- add_new_PWFLGR() (pyFTS.models.pwfts.ProbabilisticWeightedFTS method)
- aggregate() (in module pyFTS.common.Transformations) - -
- alias (pyFTS.models.multivariate.variable.Variable attribute)
- AllMethodEnsembleFTS (class in pyFTS.models.ensemble.ensemble) -
- alpha (pyFTS.common.FuzzySet.FuzzySet attribute) - - -
- alpha_cut (pyFTS.common.fts.FTS attribute) - -
- analytic_tabular_dataframe() (in module pyFTS.benchmarks.Util)
- analytical_data_columns() (in module pyFTS.benchmarks.Util) @@ -135,6 +121,8 @@
- append() (pyFTS.common.Composite.FuzzySet method)
-
+
- (pyFTS.models.multivariate.partitioner.MultivariatePartitioner method) +
- (pyFTS.partitioners.Simple.SimplePartitioner method)
- (pyFTS.probabilistic.ProbabilityDistribution.ProbabilityDistribution method) @@ -235,15 +223,13 @@
- ar() (pyFTS.benchmarks.arima.ARIMA method)
- ARIMA (class in pyFTS.benchmarks.arima) - -
- around() (pyFTS.common.SortedCollection.SortedCollection method) - -
- auto_update (pyFTS.common.fts.FTS attribute) +
- around() (pyFTS.common.SortedCollection.SortedCollection method) +
- averageloglikelihood() (pyFTS.probabilistic.ProbabilityDistribution.ProbabilityDistribution method)
- base_dataframe_columns() (in module pyFTS.benchmarks.Util) -
- batch_size (pyFTS.models.incremental.IncrementalEnsemble.IncrementalEnsembleFTS attribute) - -
- bellmf() (in module pyFTS.common.Membership) - -
- benchmark_only (pyFTS.common.fts.FTS attribute)
- bestSplit() (in module pyFTS.partitioners.Entropy)
- between() (pyFTS.common.SortedCollection.SortedCollection method) - -
- bins (pyFTS.probabilistic.ProbabilityDistribution.ProbabilityDistribution attribute)
- blip() (pyFTS.data.artificial.SignalEmulator method)
- BoxCox (class in pyFTS.common.Transformations) -
- BoxLjungStatistic() (in module pyFTS.benchmarks.Measures) +
- BoxLjungStatistic() (in module pyFTS.benchmarks.Tests) -
- BoxPierceStatistic() (in module pyFTS.benchmarks.Measures) +
- BoxPierceStatistic() (in module pyFTS.benchmarks.Tests)
- brier_score() (in module pyFTS.benchmarks.Measures)
- build() (pyFTS.models.multivariate.grid.GridCluster method)
-
+
- (pyFTS.models.multivariate.partitioner.MultivariatePartitioner method) +
- (pyFTS.models.multivariate.variable.Variable method)
- (pyFTS.models.nonstationary.partitioners.PolynomialNonStationaryPartitioner method)
@@ -310,9 +288,11 @@
+- build_cdf_qtl() (pyFTS.probabilistic.ProbabilityDistribution.ProbabilityDistribution method) -
- build_index() (pyFTS.models.seasonal.partitioner.TimeGridPartitioner method) +
- build_index() (pyFTS.models.multivariate.partitioner.MultivariatePartitioner method) @@ -334,7 +314,7 @@
- cast_dataframe_to_synthetic_probabilistic() (in module pyFTS.benchmarks.Util) -
- centroid (pyFTS.common.FuzzySet.FuzzySet attribute) +
- change_target_variable() (pyFTS.models.multivariate.partitioner.MultivariatePartitioner method)
- check_bounds() (in module pyFTS.common.FuzzySet) @@ -359,6 +339,8 @@
- chi_squared() (in module pyFTS.benchmarks.ResidualAnalysis)
- clear() (pyFTS.common.SortedCollection.SortedCollection method) + +
- clip_uod() (pyFTS.common.fts.FTS method)
- clone_parameters() (pyFTS.common.fts.FTS method) @@ -370,15 +352,19 @@
- CMeansPartitioner (class in pyFTS.partitioners.CMeans) +
- common_process_interval_jobs() (in module pyFTS.benchmarks.benchmarks) + +
- common_process_point_jobs() (in module pyFTS.benchmarks.benchmarks) + +
- common_process_probabilistic_jobs() (in module pyFTS.benchmarks.benchmarks) + +
- --
-
- components (pyFTS.data.artificial.SignalEmulator attribute)
- conditional_perturbation_factors() (pyFTS.models.nonstationary.nsfts.NonStationaryFTS method) @@ -434,10 +420,6 @@
- flrg_lhs_unconditional_probability() (pyFTS.models.pwfts.ProbabilisticWeightedFTS method)
- flrg_rhs_conditional_probability() (pyFTS.models.pwfts.ProbabilisticWeightedFTS method) - -
- flrgs (pyFTS.common.fts.FTS attribute)
- FLRGTree (class in pyFTS.common.tree) @@ -561,6 +537,12 @@
- forecast() (pyFTS.benchmarks.arima.ARIMA method) -
- forecast_ahead() (pyFTS.common.fts.FTS method) +
- forecast_ahead() (pyFTS.benchmarks.gaussianproc.GPR method)
D
+- forecast_ahead_interval() (pyFTS.benchmarks.arima.ARIMA method)
-
+
- (pyFTS.benchmarks.BSTS.ARIMA method) + +
- (pyFTS.benchmarks.gaussianproc.GPR method) + +
- (pyFTS.benchmarks.knn.KNearestNeighbors method) +
- (pyFTS.benchmarks.quantreg.QuantileRegression method)
- (pyFTS.common.fts.FTS method)
- (pyFTS.models.ensemble.ensemble.EnsembleFTS method) + +
- (pyFTS.models.ifts.IntervalFTS method) + +
- (pyFTS.models.ifts.WeightedIntervalFTS method) + +
- (pyFTS.models.multivariate.mvfts.MVFTS method)
- (pyFTS.models.pwfts.ProbabilisticWeightedFTS method)
+
-- forecast_ahead_multivariate() (pyFTS.common.fts.FTS method)
-
@@ -653,6 +657,10 @@
- forecast_distribution() (pyFTS.benchmarks.arima.ARIMA method)
-
+
- (pyFTS.benchmarks.BSTS.ARIMA method) + +
- (pyFTS.benchmarks.gaussianproc.GPR method) +
- (pyFTS.benchmarks.knn.KNearestNeighbors method)
- (pyFTS.benchmarks.quantreg.QuantileRegression method) @@ -669,6 +677,12 @@
- forecast_interval() (pyFTS.benchmarks.arima.ARIMA method)
-
+
- (pyFTS.benchmarks.BSTS.ARIMA method) + +
- (pyFTS.benchmarks.gaussianproc.GPR method) + +
- (pyFTS.benchmarks.knn.KNearestNeighbors method) +
- (pyFTS.benchmarks.quantreg.QuantileRegression method)
- (pyFTS.common.fts.FTS method) @@ -676,6 +690,8 @@
- (pyFTS.models.ensemble.ensemble.EnsembleFTS method)
- (pyFTS.models.ifts.IntervalFTS method) + +
- (pyFTS.models.ifts.WeightedIntervalFTS method)
- (pyFTS.models.multivariate.mvfts.MVFTS method) @@ -695,25 +711,15 @@
- format_data() (pyFTS.models.multivariate.mvfts.MVFTS method) + + +
- format_experiment_table() (in module pyFTS.benchmarks.Tests)
- FTS (class in pyFTS.common.fts) -
- fts_method (pyFTS.models.incremental.IncrementalEnsemble.IncrementalEnsembleFTS attribute) - - -
- fts_params (pyFTS.models.incremental.IncrementalEnsemble.IncrementalEnsembleFTS attribute) - -
- fuzzify() (in module pyFTS.models.nonstationary.common)
- fuzzy() (pyFTS.common.fts.FTS method) @@ -728,6 +734,8 @@
- (pyFTS.models.multivariate.cmvfts.ClusteredMVFTS method)
- (pyFTS.models.multivariate.grid.IncrementalGridCluster method) + +
- (pyFTS.models.multivariate.partitioner.MultivariatePartitioner method)
- (pyFTS.partitioners.partitioner.Partitioner method) @@ -938,10 +946,12 @@
- (in module pyFTS.data.sunspots)
- - forecast_distribution() (pyFTS.benchmarks.arima.ARIMA method)
+ - get_distribution_interquantile() (pyFTS.models.ensemble.ensemble.EnsembleFTS method)
- get_distribution_statistics() (in module pyFTS.benchmarks.Measures) @@ -965,6 +975,8 @@
- get_interval() (pyFTS.models.ensemble.ensemble.EnsembleFTS method) +
+- get_interval_ahead_statistics() (in module pyFTS.benchmarks.Measures)
- get_interval_methods() (in module pyFTS.benchmarks.benchmarks)
@@ -988,6 +1000,8 @@- (pyFTS.models.hofts.WeightedHighOrderFLRG method)
- (pyFTS.models.ifts.IntervalFTS method) +
+- (pyFTS.models.ifts.WeightedIntervalFTS method)
- (pyFTS.models.multivariate.flrg.FLRG method)
@@ -1079,15 +1093,25 @@- get_sequence_membership() (pyFTS.models.ifts.IntervalFTS method) + +
- get_UoD() (pyFTS.common.fts.FTS method) + +
- get_upper() (pyFTS.common.flrg.FLRG method)
- (pyFTS.models.hofts.WeightedHighOrderFLRG method)
- (pyFTS.models.ifts.IntervalFTS method) + +
- (pyFTS.models.ifts.WeightedIntervalFTS method)
- (pyFTS.models.multivariate.flrg.FLRG method) @@ -1107,6 +1131,8 @@
- getChildren() (pyFTS.common.tree.FLRGTreeNode method)
- getStr() (pyFTS.common.tree.FLRGTreeNode method) + +
- GPR (class in pyFTS.benchmarks.gaussianproc)
- grant_bounds() (in module pyFTS.common.FuzzySet) @@ -1122,26 +1148,10 @@
- parameters (pyFTS.common.FuzzySet.FuzzySet attribute) +
- parameters (pyFTS.common.Transformations.AdaptiveExpectation attribute)
- partition_function() (pyFTS.common.FuzzySet.FuzzySet method) @@ -1455,28 +1383,6 @@
- Partitioner (class in pyFTS.partitioners.partitioner) - -
- partitioner (pyFTS.common.fts.FTS attribute) - - -
- partitioner_method (pyFTS.models.incremental.IncrementalEnsemble.IncrementalEnsembleFTS attribute) - - -
- partitioner_params (pyFTS.models.incremental.IncrementalEnsemble.IncrementalEnsembleFTS attribute) - - -
- partitions (pyFTS.partitioners.partitioner.Partitioner attribute)
- paths() (pyFTS.common.tree.FLRGTreeNode method) @@ -1527,8 +1433,12 @@
- plot_density_rectange() (in module pyFTS.common.Util)
- plot_distribution() (in module pyFTS.common.Util) + +
- plot_distribution2() (in module pyFTS.common.Util)
- plot_interval() (in module pyFTS.common.Util) + +
- plot_interval2() (in module pyFTS.common.Util)
- plot_partitioners() (in module pyFTS.partitioners.Util) @@ -1555,8 +1465,6 @@
- plotResiduals() (in module pyFTS.benchmarks.ResidualAnalysis)
- PMF() (in module pyFTS.partitioners.Entropy) - -
- pmf_to_cdf() (in module pyFTS.benchmarks.Measures)
- point_dataframe_analytic_columns() (in module pyFTS.benchmarks.Util) @@ -1565,8 +1473,6 @@
- point_expected_value() (pyFTS.models.pwfts.ProbabilisticWeightedFTS method)
- point_heuristic() (pyFTS.models.pwfts.ProbabilisticWeightedFTS method) - -
- point_method (pyFTS.models.ensemble.ensemble.EnsembleFTS attribute)
- point_to_interval() (pyFTS.benchmarks.quantreg.QuantileRegression method) @@ -1576,9 +1482,9 @@
- PolynomialNonStationaryPartitioner (class in pyFTS.models.nonstationary.partitioners) -
- predict() (pyFTS.common.fts.FTS method) +
- post_hoc_tests() (in module pyFTS.benchmarks.Tests) -
- prefix (pyFTS.partitioners.partitioner.Partitioner attribute) +
- predict() (pyFTS.common.fts.FTS method)
- print_distribution_statistics() (in module pyFTS.benchmarks.benchmarks) @@ -1599,17 +1505,29 @@
- ProbabilityDistribution (class in pyFTS.probabilistic.ProbabilityDistribution)
- process_common_data() (in module pyFTS.benchmarks.Util) + +
- process_common_data2() (in module pyFTS.benchmarks.Util)
- process_interval_jobs() (in module pyFTS.benchmarks.benchmarks) + +
- process_interval_jobs2() (in module pyFTS.benchmarks.benchmarks)
- process_point_jobs() (in module pyFTS.benchmarks.benchmarks) + +
- process_point_jobs2() (in module pyFTS.benchmarks.benchmarks)
- process_probabilistic_jobs() (in module pyFTS.benchmarks.benchmarks) + +
- process_probabilistic_jobs2() (in module pyFTS.benchmarks.benchmarks)
- product_dict() (in module pyFTS.models.multivariate.mvfts)
- prune() (pyFTS.models.multivariate.grid.IncrementalGridCluster method) + +
- pseudologlikelihood() (pyFTS.probabilistic.ProbabilityDistribution.ProbabilityDistribution method)
- pyFTS (module) @@ -1619,6 +1537,10 @@
- pyFTS.benchmarks.arima (module)
- pyFTS.benchmarks.benchmarks (module) + +
- pyFTS.benchmarks.BSTS (module) + +
- pyFTS.benchmarks.gaussianproc (module)
- pyFTS.benchmarks.knn (module) @@ -1629,6 +1551,8 @@
- pyFTS.benchmarks.quantreg (module)
- pyFTS.benchmarks.ResidualAnalysis (module) + +
- pyFTS.benchmarks.Tests (module)
- pyFTS.benchmarks.Util (module) @@ -1636,10 +1560,10 @@
- pyFTS.common.Composite (module) -
- pyFTS.common.flrg (module)
- pyFTS.common.fts (module) @@ -1753,6 +1677,8 @@
- pyFTS.models.multivariate.grid (module)
- pyFTS.models.multivariate.mvfts (module) + +
- pyFTS.models.multivariate.partitioner (module)
- pyFTS.models.multivariate.variable (module) @@ -1862,18 +1788,12 @@
- Retrainer (class in pyFTS.models.incremental.TimeVariant) -
- RHS (pyFTS.common.FLR.FLR attribute) - - -
- rmse() (in module pyFTS.benchmarks.Measures)
- rmse_interval() (in module pyFTS.benchmarks.Measures) @@ -1883,10 +1803,16 @@
- run() (pyFTS.data.artificial.SignalEmulator method)
- run_interval() (in module pyFTS.benchmarks.benchmarks) + +
- run_interval2() (in module pyFTS.benchmarks.benchmarks)
- run_point() (in module pyFTS.benchmarks.benchmarks) + +
- run_point2() (in module pyFTS.benchmarks.benchmarks)
- run_probabilistic() (in module pyFTS.benchmarks.benchmarks) + +
- run_probabilistic2() (in module pyFTS.benchmarks.benchmarks)
H
K
P
@@ -1912,14 +1838,14 @@ --
-
+ -+ - scale_up() (pyFTS.models.nonstationary.partitioners.PolynomialNonStationaryPartitioner method)
-- search() (pyFTS.models.seasonal.partitioner.TimeGridPartitioner method) +
- search() (pyFTS.models.multivariate.partitioner.MultivariatePartitioner method)
-- season (pyFTS.models.seasonal.partitioner.TimeGridPartitioner attribute) -
- SeasonalEnsembleFTS (class in pyFTS.models.ensemble.multiseasonal)
- SeasonalFLRG (class in pyFTS.models.seasonal.sfts) @@ -1960,21 +1886,19 @@
-
-
- setnames (pyFTS.partitioners.partitioner.Partitioner attribute) - -
- sets (pyFTS.common.fts.FTS attribute) -
- share_parameters() (in module pyFTS.distributed.spark)
- sharpness() (in module pyFTS.benchmarks.Measures) - -
- shortname (pyFTS.common.fts.FTS attribute)
- show_and_save_image() (in module pyFTS.common.Util)
- sigmf() (in module pyFTS.common.Membership)
- SignalEmulator (class in pyFTS.data.artificial) + +
- simple_synthetic_dataframe() (in module pyFTS.benchmarks.Util) + +
- SimpleEnsembleFTS (class in pyFTS.models.ensemble.ensemble)
- simplenonstationary_gridpartitioner_builder() (in module pyFTS.models.nonstationary.partitioners) @@ -2003,6 +1927,8 @@
- sliding_window() (in module pyFTS.common.Util)
- sliding_window_benchmarks() (in module pyFTS.benchmarks.benchmarks) + +
- sliding_window_benchmarks2() (in module pyFTS.benchmarks.benchmarks)
- smape() (in module pyFTS.benchmarks.Measures) @@ -2027,6 +1953,8 @@
- tabular_dataframe_columns() (in module pyFTS.benchmarks.Util) + +
- test_mean_equality() (in module pyFTS.benchmarks.Tests)
- TheilsInequality() (in module pyFTS.benchmarks.Measures) @@ -2037,6 +1965,10 @@
- train() (pyFTS.benchmarks.arima.ARIMA method)
-
+
- (pyFTS.benchmarks.BSTS.ARIMA method) + +
- (pyFTS.benchmarks.gaussianproc.GPR method) +
- (pyFTS.benchmarks.knn.KNearestNeighbors method)
- (pyFTS.benchmarks.quantreg.QuantileRegression method) @@ -2048,6 +1980,8 @@
- (pyFTS.models.ensemble.ensemble.AllMethodEnsembleFTS method)
- (pyFTS.models.ensemble.ensemble.EnsembleFTS method) + +
- (pyFTS.models.ensemble.ensemble.SimpleEnsembleFTS method)
- (pyFTS.models.ensemble.multiseasonal.SeasonalEnsembleFTS method) @@ -2101,16 +2035,6 @@
- Transformation (class in pyFTS.common.Transformations) - -
- transformation (pyFTS.models.multivariate.variable.Variable attribute) - - -
- transformations (pyFTS.common.fts.FTS attribute) - -
- transformations_param (pyFTS.common.fts.FTS attribute)
- trapmf() (in module pyFTS.common.Membership) @@ -2120,14 +2044,6 @@
- trimf() (in module pyFTS.common.Membership) -
- type (pyFTS.common.FuzzySet.FuzzySet attribute) - -
- unified_scaled_probabilistic() (in module pyFTS.benchmarks.Util)
- uniquefilename() (in module pyFTS.common.Util) - -
- unpack_args() (pyFTS.models.nonstationary.flrg.NonStationaryFLRG method)
@@ -2143,14 +2059,10 @@-
-
- uod (pyFTS.probabilistic.ProbabilityDistribution.ProbabilityDistribution attribute) - -
- uod_clip (pyFTS.common.fts.FTS attribute) +
- unpack_args() (pyFTS.models.nonstationary.flrg.NonStationaryFLRG method)
- update_model() (pyFTS.models.pwfts.ProbabilisticWeightedFTS method) @@ -2168,12 +2080,6 @@
- visualize_distributions() (in module pyFTS.models.pwfts) @@ -2195,6 +2101,8 @@
- WeightedHighOrderFLRG (class in pyFTS.models.hofts)
- WeightedHighOrderFTS (class in pyFTS.models.hofts) + +
- WeightedIntervalFTS (class in pyFTS.models.ifts)
- WeightedMVFTS (class in pyFTS.models.multivariate.wmvfts) @@ -2215,19 +2123,9 @@
- white_noise() (in module pyFTS.data.artificial) - -
- width (pyFTS.models.nonstationary.common.FuzzySet attribute) - -
- width_params (pyFTS.models.nonstationary.common.FuzzySet attribute)
- window_index() (in module pyFTS.models.nonstationary.common) -
- window_length (pyFTS.models.incremental.IncrementalEnsemble.IncrementalEnsembleFTS attribute) - -
- winkler_mean() (in module pyFTS.benchmarks.Measures)
- winkler_score() (in module pyFTS.benchmarks.Measures)
@@ -2245,10 +2143,6 @@
Z
- +- Z() (in module pyFTS.common.Transformations) diff --git a/docs/build/html/modules.html b/docs/build/html/modules.html index fd2260d..6595c82 100644 --- a/docs/build/html/modules.html +++ b/docs/build/html/modules.html @@ -103,11 +103,14 @@
- pyFTS.benchmarks.benchmarks module
- pyFTS.benchmarks.Measures module
- pyFTS.benchmarks.ResidualAnalysis module +
- pyFTS.benchmarks.Tests module
- pyFTS.benchmarks.Util module
- pyFTS.benchmarks.arima module
- pyFTS.benchmarks.knn module
- pyFTS.benchmarks.naive module
- pyFTS.benchmarks.quantreg module +
- pyFTS.benchmarks.gaussianproc module +
- pyFTS.benchmarks.BSTS module
- pyFTS.common package
-
diff --git a/docs/build/html/objects.inv b/docs/build/html/objects.inv
index 36aadd1..0d04302 100644
Binary files a/docs/build/html/objects.inv and b/docs/build/html/objects.inv differ
diff --git a/docs/build/html/py-modindex.html b/docs/build/html/py-modindex.html
index b8a7489..faa3a9a 100644
--- a/docs/build/html/py-modindex.html
+++ b/docs/build/html/py-modindex.html
@@ -105,6 +105,16 @@
pyFTS.benchmarks.benchmarks
+ ++ + pyFTS.benchmarks.BSTS
+ + + + pyFTS.benchmarks.gaussianproc
+ @@ -130,6 +140,11 @@ pyFTS.benchmarks.ResidualAnalysis
+ + + pyFTS.benchmarks.Tests
+ @@ -435,6 +450,11 @@ pyFTS.models.multivariate.mvfts
+ + + pyFTS.models.multivariate.partitioner
+ diff --git a/docs/build/html/pyFTS.benchmarks.html b/docs/build/html/pyFTS.benchmarks.html index cf1d365..77afebb 100644 --- a/docs/build/html/pyFTS.benchmarks.html +++ b/docs/build/html/pyFTS.benchmarks.html @@ -68,11 +68,14 @@ - pyFTS.benchmarks.benchmarks module
- pyFTS.benchmarks.Measures module
- pyFTS.benchmarks.ResidualAnalysis module
+- pyFTS.benchmarks.Tests module
- pyFTS.benchmarks.Util module
- pyFTS.benchmarks.arima module
- pyFTS.benchmarks.knn module
- pyFTS.benchmarks.naive module
- pyFTS.benchmarks.quantreg module
+- pyFTS.benchmarks.gaussianproc module
+- pyFTS.benchmarks.BSTS module
@@ -124,74 +127,89 @@Benchmarks methods for FTS methods
-
-
pyFTS.benchmarks.benchmarks.
SelecaoSimples_MenorRMSE
(original, parameters, modelo)[source]¶
+
pyFTS.benchmarks.benchmarks.
SelecaoSimples_MenorRMSE
(original, parameters, modelo)¶ +-
+
-
+
pyFTS.benchmarks.benchmarks.
common_process_interval_jobs
(conn, data, job)¶
+
-
+
-
+
pyFTS.benchmarks.benchmarks.
common_process_point_jobs
(conn, data, job)¶
+
-
+
-
+
pyFTS.benchmarks.benchmarks.
common_process_probabilistic_jobs
(conn, data, job)¶
-
-
pyFTS.benchmarks.benchmarks.
compareModelsPlot
(original, models_fo, models_ho)[source]¶
+
pyFTS.benchmarks.benchmarks.
compareModelsPlot
(original, models_fo, models_ho)¶-
-
pyFTS.benchmarks.benchmarks.
compareModelsTable
(original, models_fo, models_ho)[source]¶
+
pyFTS.benchmarks.benchmarks.
compareModelsTable
(original, models_fo, models_ho)¶-
-
pyFTS.benchmarks.benchmarks.
get_benchmark_interval_methods
()[source]¶
+ Return all non FTS methods for point_to_interval forecasting
pyFTS.benchmarks.benchmarks.
get_benchmark_interval_methods
()¶-
-
pyFTS.benchmarks.benchmarks.
get_benchmark_point_methods
()[source]¶
+ Return all non FTS methods for point forecasting
pyFTS.benchmarks.benchmarks.
get_benchmark_point_methods
()¶-
-
pyFTS.benchmarks.benchmarks.
get_benchmark_probabilistic_methods
()[source]¶
+ Return all FTS methods for probabilistic forecasting
pyFTS.benchmarks.benchmarks.
get_benchmark_probabilistic_methods
()¶-
-
pyFTS.benchmarks.benchmarks.
get_interval_methods
()[source]¶
+ Return all FTS methods for point_to_interval forecasting
pyFTS.benchmarks.benchmarks.
get_interval_methods
()¶-
-
pyFTS.benchmarks.benchmarks.
get_point_methods
()[source]¶
+ Return all FTS methods for point forecasting
pyFTS.benchmarks.benchmarks.
get_point_methods
()¶-
-
pyFTS.benchmarks.benchmarks.
get_point_multivariate_methods
()[source]¶
+ Return all multivariate FTS methods por point forecasting
pyFTS.benchmarks.benchmarks.
get_point_multivariate_methods
()¶-
-
pyFTS.benchmarks.benchmarks.
get_probabilistic_methods
()[source]¶
+ Return all FTS methods for probabilistic forecasting
pyFTS.benchmarks.benchmarks.
get_probabilistic_methods
()¶-
-
pyFTS.benchmarks.benchmarks.
pftsExploreOrderAndPartitions
(data, save=False, file=None)[source]¶
+
pyFTS.benchmarks.benchmarks.
pftsExploreOrderAndPartitions
(data, save=False, file=None)¶-
-
pyFTS.benchmarks.benchmarks.
plotCompared
(original, forecasts, labels, title)[source]¶
+
pyFTS.benchmarks.benchmarks.
plotCompared
(original, forecasts, labels, title)¶-
-
pyFTS.benchmarks.benchmarks.
plot_compared_series
(original, models, colors, typeonlegend=False, save=False, file=None, tam=[20, 5], points=True, intervals=True, linewidth=1.5)[source]¶
+ Plot the forecasts of several one step ahead models, by point or by interval
@@ -220,12 +238,12 @@ -
-
pyFTS.benchmarks.benchmarks.
plot_point
(axis, points, order, label, color='red', ls='-', linewidth=1)[source]¶
+
pyFTS.benchmarks.benchmarks.
plot_point
(axis, points, order, label, color='red', ls='-', linewidth=1)¶-
-
pyFTS.benchmarks.benchmarks.
print_distribution_statistics
(original, models, steps, resolution)[source]¶
+ Run probabilistic benchmarks on given models and data and print the results
@@ -246,7 +264,7 @@ -
-
pyFTS.benchmarks.benchmarks.
print_interval_statistics
(original, models)[source]¶
+ Run interval benchmarks on given models and data and print the results
@@ -267,7 +285,7 @@ -
-
pyFTS.benchmarks.benchmarks.
print_point_statistics
(data, models, externalmodels=None, externalforecasts=None, indexers=None)[source]¶
+ Run point benchmarks on given models and data and print the results
@@ -291,7 +309,7 @@ -
-
pyFTS.benchmarks.benchmarks.
process_interval_jobs
(dataset, tag, job, conn)[source]¶
+ Extract information from an dictionary with interval benchmark results and save it on a database
@@ -312,9 +330,37 @@
pyFTS.benchmarks.benchmarks.
process_interval_jobs
(dataset, tag, job, conn)¶-
+
-
+
pyFTS.benchmarks.benchmarks.
process_interval_jobs2
(dataset, tag, job, conn)¶
+
-
-
pyFTS.benchmarks.benchmarks.
process_point_jobs
(dataset, tag, job, conn)[source]¶
+ Extract information from a dictionary with point benchmark results and save it on a database
++
++ + + +Parameters: +-
+
- dataset – the benchmark dataset name +
- tag – alias for the benchmark group being executed +
- job – a dictionary with the benchmark results +
- conn – a connection to a Sqlite database +
+ +Returns: + +
pyFTS.benchmarks.benchmarks.
process_point_jobs
(dataset, tag, job, conn)¶ +-
+
-
+
pyFTS.benchmarks.benchmarks.
process_point_jobs2
(dataset, tag, job, conn)¶ Extract information from a dictionary with point benchmark results and save it on a database
@@ -337,7 +383,30 @@ -
-
pyFTS.benchmarks.benchmarks.
process_probabilistic_jobs
(dataset, tag, job, conn)[source]¶
+ Extract information from an dictionary with probabilistic benchmark results and save it on a database
++
++ + + +Parameters: +-
+
- dataset – the benchmark dataset name +
- tag – alias for the benchmark group being executed +
- job – a dictionary with the benchmark results +
- conn – a connection to a Sqlite database +
+ +Returns: + +
pyFTS.benchmarks.benchmarks.
process_probabilistic_jobs
(dataset, tag, job, conn)¶ +-
+
-
+
pyFTS.benchmarks.benchmarks.
process_probabilistic_jobs2
(dataset, tag, job, conn)¶ Extract information from an dictionary with probabilistic benchmark results and save it on a database
@@ -360,7 +429,7 @@ -
-
pyFTS.benchmarks.benchmarks.
run_interval
(mfts, partitioner, train_data, test_data, window_key=None, **kwargs)[source]¶
+ Run the interval forecasting benchmarks
@@ -384,9 +453,14 @@
pyFTS.benchmarks.benchmarks.
run_interval
(mfts, partitioner, train_data, test_data, window_key=None, **kwargs)¶-
+
-
+
pyFTS.benchmarks.benchmarks.
run_interval2
(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs)¶
+
-
-
pyFTS.benchmarks.benchmarks.
run_point
(mfts, partitioner, train_data, test_data, window_key=None, **kwargs)[source]¶
+ Run the point forecasting benchmarks
@@ -410,9 +484,14 @@
pyFTS.benchmarks.benchmarks.
run_point
(mfts, partitioner, train_data, test_data, window_key=None, **kwargs)¶-
+
-
+
pyFTS.benchmarks.benchmarks.
run_point2
(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs)¶
+
-
-
pyFTS.benchmarks.benchmarks.
run_probabilistic
(mfts, partitioner, train_data, test_data, window_key=None, **kwargs)[source]¶
+ Run the probabilistic forecasting benchmarks
@@ -438,14 +517,19 @@
pyFTS.benchmarks.benchmarks.
run_probabilistic
(mfts, partitioner, train_data, test_data, window_key=None, **kwargs)¶-
+
-
+
pyFTS.benchmarks.benchmarks.
run_probabilistic2
(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs)¶
+
-
-
pyFTS.benchmarks.benchmarks.
simpleSearch_RMSE
(train, test, model, partitions, orders, save=False, file=None, tam=[10, 15], plotforecasts=False, elev=30, azim=144, intervals=False, parameters=None, partitioner=<class 'pyFTS.partitioners.Grid.GridPartitioner'>, transformation=None, indexer=None)[source]¶
+
pyFTS.benchmarks.benchmarks.
simpleSearch_RMSE
(train, test, model, partitions, orders, save=False, file=None, tam=[10, 15], plotforecasts=False, elev=30, azim=144, intervals=False, parameters=None, partitioner=<class 'pyFTS.partitioners.Grid.GridPartitioner'>, transformation=None, indexer=None)¶-
-
pyFTS.benchmarks.benchmarks.
sliding_window_benchmarks
(data, windowsize, train=0.8, **kwargs)[source]¶
+ Sliding window benchmarks for FTS forecasters.
For each data window, a train and test datasets will be splitted. For each train split, number of partitions and partitioning method will be created a partitioner model. And for each partitioner, order, @@ -492,55 +576,18 @@ informing the list of dispy nodes on ‘nodes’ parameter.
pyFTS.benchmarks.benchmarks.
sliding_window_benchmarks
(data, windowsize, train=0.8, **kwargs)¶-
-
-
+
-
+
pyFTS.benchmarks.benchmarks.
sliding_window_benchmarks2
(data, windowsize, train=0.8, **kwargs)¶
+
pyFTS.benchmarks.Measures module¶
pyFTS module for common benchmark metrics
--
-
-
-
pyFTS.benchmarks.Measures.
BoxLjungStatistic
(data, h)[source]¶
- Q Statistic for Ljung–Box test
--
-- - - -Parameters: --
-
- data – -
- h – -
- -Returns: - -
-
-
-
-
pyFTS.benchmarks.Measures.
BoxPierceStatistic
(data, h)[source]¶
- Q Statistic for Box-Pierce test
--
-- - - -Parameters: --
-
- data – -
- h – -
- -Returns: - -
-
-
pyFTS.benchmarks.Measures.
TheilsInequality
(targets, forecasts)[source]¶
+ Theil’s Inequality Coefficient
@@ -561,7 +608,7 @@ informing the list of dispy nodes on ‘nodes’ parameter. -
-
pyFTS.benchmarks.Measures.
UStatistic
(targets, forecasts)[source]¶
+ Theil’s U Statistic
@@ -582,7 +629,7 @@ informing the list of dispy nodes on ‘nodes’ parameter. -
-
pyFTS.benchmarks.Measures.
acf
(data, k)[source]¶
+ Autocorrelation function estimative
@@ -603,19 +650,35 @@ informing the list of dispy nodes on ‘nodes’ parameter. -
-
pyFTS.benchmarks.Measures.
brier_score
(targets, densities)[source]¶
- Brier (1950). “Verification of Forecasts Expressed in Terms of Probability”. Monthly Weather Review. 78: 1–3.
+pyFTS.benchmarks.Measures.
brier_score
(targets, densities)¶ +Brier Score for probabilistic forecasts. +Brier (1950). “Verification of Forecasts Expressed in Terms of Probability”. Monthly Weather Review. 78: 1–3.
++
+ + + +Parameters: +-
+
- targets – a list with the target values +
- densities – a list with pyFTS.probabil objectsistic.ProbabilityDistribution +
+ +Returns: +float
+
-
-
pyFTS.benchmarks.Measures.
coverage
(targets, forecasts)[source]¶
+ Percent of target values that fall inside forecasted interval
pyFTS.benchmarks.Measures.
coverage
(targets, forecasts)¶-
-
pyFTS.benchmarks.Measures.
crps
(targets, densities)[source]¶
+ Continuous Ranked Probability Score
@@ -635,8 +698,8 @@ informing the list of dispy nodes on ‘nodes’ parameter. -
-
-
-
pyFTS.benchmarks.Measures.
get_distribution_statistics
(data, model, **kwargs)[source]¶
+ -
+
pyFTS.benchmarks.Measures.
get_distribution_ahead_statistics
(data, distributions)¶ Get CRPS statistic and time for a forecasting model
@@ -657,8 +720,30 @@ informing the list of dispy nodes on ‘nodes’ parameter. -
-
-
-
pyFTS.benchmarks.Measures.
get_interval_statistics
(data, model, **kwargs)[source]¶
+ -
+
pyFTS.benchmarks.Measures.
get_distribution_statistics
(data, model, **kwargs)¶
+ Get CRPS statistic and time for a forecasting model
++
++ + + +Parameters: +-
+
- data – test data +
- model – FTS model with probabilistic forecasting capability +
- kwargs – +
+ +Returns: +a list with the CRPS and execution time
+
-
+
-
+
pyFTS.benchmarks.Measures.
get_interval_ahead_statistics
(data, intervals, **kwargs)¶ Condensate all measures for point interval forecasters
@@ -671,17 +756,40 @@ informing the list of dispy nodes on ‘nodes’ parameter. - Returns: a list with the sharpness, resolution, coverage, .05 pinball mean,
+ + +Returns: +a list with the sharpness, resolution, coverage, .05 pinball mean, +.25 pinball mean, .75 pinball mean and .95 pinball mean.
+
-
+
-
+
pyFTS.benchmarks.Measures.
get_interval_statistics
(data, model, **kwargs)¶
+ Condensate all measures for point interval forecasters
++
-+ + + +Parameters: +-
+
- data – test data +
- model – FTS model with interval forecasting capability +
- kwargs – +
Returns: a list with the sharpness, resolution, coverage, .05 pinball mean, +.25 pinball mean, .75 pinball mean and .95 pinball mean.
.25 pinball mean, .75 pinball mean and .95 pinball mean.
-
-
pyFTS.benchmarks.Measures.
get_point_statistics
(data, model, **kwargs)[source]¶
+ Condensate all measures for point forecasters
@@ -702,18 +810,30 @@ informing the list of dispy nodes on ‘nodes’ parameter. - - + - +
+pyFTS.benchmarks.Measures.
logarithm_score
(targets, densities)¶Logarithm Score for probabilistic forecasts. +Good IJ (1952). “Rational Decisions.”Journal of the Royal Statistical Society B,14(1),107–114. URLhttps://www.jstor.org/stable/2984087.
++
++ + + +Parameters: +-
+
- targets – a list with the target values +
- densities – a list with pyFTS.probabil objectsistic.ProbabilityDistribution +
+ +Returns: +float
+-
-
pyFTS.benchmarks.Measures.
mape
(targets, forecasts)[source]¶
+ Mean Average Percentual Error
@@ -734,12 +854,12 @@ informing the list of dispy nodes on ‘nodes’ parameter. -
-
pyFTS.benchmarks.Measures.
mape_interval
(targets, forecasts)[source]¶
+
pyFTS.benchmarks.Measures.
mape_interval
(targets, forecasts)¶-
-
pyFTS.benchmarks.Measures.
pinball
(tau, target, forecast)[source]¶
+ Pinball loss function. Measure the distance of forecast to the tau-quantile of the target
@@ -761,7 +881,7 @@ informing the list of dispy nodes on ‘nodes’ parameter. -
-
pyFTS.benchmarks.Measures.
pinball_mean
(tau, targets, forecasts)[source]¶
+ Mean pinball loss value of the forecast for a given tau-quantile of the targets
@@ -781,20 +901,15 @@ informing the list of dispy nodes on ‘nodes’ parameter.
pyFTS.benchmarks.Measures.
pinball_mean
(tau, targets, forecasts)¶-
-
pyFTS.benchmarks.Measures.
resolution
(forecasts)[source]¶
+ Resolution - Standard deviation of the intervals
pyFTS.benchmarks.Measures.
resolution
(forecasts)¶-
-
pyFTS.benchmarks.Measures.
rmse
(targets, forecasts)[source]¶
+ Root Mean Squared Error
@@ -815,7 +930,7 @@ informing the list of dispy nodes on ‘nodes’ parameter. -
-
pyFTS.benchmarks.Measures.
rmse_interval
(targets, forecasts)[source]¶
+ Root Mean Squared Error
@@ -836,13 +951,13 @@ informing the list of dispy nodes on ‘nodes’ parameter. -
-
pyFTS.benchmarks.Measures.
sharpness
(forecasts)[source]¶
+ Sharpness - Mean size of the intervals
pyFTS.benchmarks.Measures.
sharpness
(forecasts)¶-
-
pyFTS.benchmarks.Measures.
smape
(targets, forecasts, type=2)[source]¶
+ Symmetric Mean Average Percentual Error
@@ -864,7 +979,7 @@ informing the list of dispy nodes on ‘nodes’ parameter. -
-
pyFTS.benchmarks.Measures.
winkler_mean
(tau, targets, forecasts)[source]¶
+ Mean Winkler score value of the forecast for a given tau-quantile of the targets
@@ -886,13 +1001,29 @@ informing the list of dispy nodes on ‘nodes’ parameter. -
-
pyFTS.benchmarks.Measures.
winkler_score
(tau, target, forecast)[source]¶
+ - Winkler, A Decision-Theoretic Approach to Interval Estimation, J. Am. Stat. Assoc. 67 (337) (1972) 187–191. doi:10.2307/2284720.
+
+ + + +Parameters: +-
+
- tau – +
- target – +
- forecast – +
+ +Returns: + +
pyFTS.benchmarks.Measures.
winkler_score
(tau, target, forecast)¶Residual Analysis methods
-
-
pyFTS.benchmarks.ResidualAnalysis.
chi_squared
(q, h)[source]¶
+ Chi-Squared value
@@ -922,7 +1053,7 @@ informing the list of dispy nodes on ‘nodes’ parameter. -
-
pyFTS.benchmarks.ResidualAnalysis.
compare_residuals
(data, models)[source]¶
+ Compare residual’s statistics of several models
@@ -943,7 +1074,7 @@ informing the list of dispy nodes on ‘nodes’ parameter. -
-
pyFTS.benchmarks.ResidualAnalysis.
plotResiduals
(targets, models, tam=[8, 8], save=False, file=None)[source]¶
+ Plot residuals and statistics
@@ -967,72 +1098,171 @@ informing the list of dispy nodes on ‘nodes’ parameter. -
-
pyFTS.benchmarks.ResidualAnalysis.
plot_residuals
(targets, models, tam=[8, 8], save=False, file=None)[source]¶
+
pyFTS.benchmarks.ResidualAnalysis.
plot_residuals
(targets, models, tam=[8, 8], save=False, file=None)¶-
-
pyFTS.benchmarks.ResidualAnalysis.
residuals
(targets, forecasts, order=1)[source]¶
+ First order residuals
pyFTS.benchmarks.ResidualAnalysis.
residuals
(targets, forecasts, order=1)¶-
-
pyFTS.benchmarks.ResidualAnalysis.
single_plot_residuals
(targets, forecasts, order, tam=[8, 8], save=False, file=None)[source]¶
+
pyFTS.benchmarks.ResidualAnalysis.
single_plot_residuals
(targets, forecasts, order, tam=[8, 8], save=False, file=None)¶+pyFTS.benchmarks.Tests module¶
+-
+
-
+
pyFTS.benchmarks.Tests.
BoxLjungStatistic
(data, h)¶
+ Q Statistic for Ljung–Box test
++
++ + + +Parameters: +-
+
- data – +
- h – +
+ +Returns: + +
-
+
-
+
pyFTS.benchmarks.Tests.
BoxPierceStatistic
(data, h)¶
+ Q Statistic for Box-Pierce test
++
++ + + +Parameters: +-
+
- data – +
- h – +
+ +Returns: + +
-
+
-
+
pyFTS.benchmarks.Tests.
format_experiment_table
(df, exclude=[], replace={}, csv=True, std=False)¶
+
-
+
-
+
pyFTS.benchmarks.Tests.
post_hoc_tests
(post_hoc, control_method, alpha=0.05, method='finner')¶
+ Finner paired post-hoc test with NSFTS as control method.
+$H_0$: There is no significant difference between the means
+$H_1$: There is a significant difference between the means
++
++ + + +Parameters: +-
+
- post_hoc – +
- control_method – +
- alpha – +
- method – +
+ +Returns: + +
-
+
-
+
pyFTS.benchmarks.Tests.
test_mean_equality
(tests, alpha=0.05, method='friedman')¶
+ Test for the equality of the means, with alpha confidence level.
+H_0: There’s no significant difference between the means +H_1: There is at least one significant difference between the means
++
++ + + +Parameters: +-
+
- tests – +
- alpha – +
- method – +
+ +Returns: + +
pyFTS.benchmarks.Util module¶
Facilities for pyFTS Benchmark module
-
-
pyFTS.benchmarks.Util.
analytic_tabular_dataframe
(dataframe)[source]¶
+
pyFTS.benchmarks.Util.
analytic_tabular_dataframe
(dataframe)¶-
-
pyFTS.benchmarks.Util.
analytical_data_columns
(experiments)[source]¶
+
pyFTS.benchmarks.Util.
analytical_data_columns
(experiments)¶-
-
pyFTS.benchmarks.Util.
base_dataframe_columns
()[source]¶
+
pyFTS.benchmarks.Util.
base_dataframe_columns
()¶-
-
pyFTS.benchmarks.Util.
cast_dataframe_to_synthetic
(infile, outfile, experiments, type)[source]¶
+
pyFTS.benchmarks.Util.
cast_dataframe_to_synthetic
(infile, outfile, experiments, type)¶-
-
pyFTS.benchmarks.Util.
cast_dataframe_to_synthetic_interval
(df, data_columns)[source]¶
+
pyFTS.benchmarks.Util.
cast_dataframe_to_synthetic_interval
(df, data_columns)¶-
-
pyFTS.benchmarks.Util.
cast_dataframe_to_synthetic_point
(df, data_columns)[source]¶
+
pyFTS.benchmarks.Util.
cast_dataframe_to_synthetic_point
(df, data_columns)¶-
-
pyFTS.benchmarks.Util.
cast_dataframe_to_synthetic_probabilistic
(df, data_columns)[source]¶
+
pyFTS.benchmarks.Util.
cast_dataframe_to_synthetic_probabilistic
(df, data_columns)¶-
-
pyFTS.benchmarks.Util.
check_ignore_list
(b, ignore)[source]¶
+
pyFTS.benchmarks.Util.
check_ignore_list
(b, ignore)¶-
-
pyFTS.benchmarks.Util.
check_replace_list
(m, replace)[source]¶
+
pyFTS.benchmarks.Util.
check_replace_list
(m, replace)¶-
-
pyFTS.benchmarks.Util.
create_benchmark_tables
(conn)[source]¶
+ Create a sqlite3 table designed to store benchmark results.
@@ -1046,17 +1276,17 @@ informing the list of dispy nodes on ‘nodes’ parameter. -
-
pyFTS.benchmarks.Util.
extract_measure
(dataframe, measure, data_columns)[source]¶
+
pyFTS.benchmarks.Util.
extract_measure
(dataframe, measure, data_columns)¶-
-
pyFTS.benchmarks.Util.
find_best
(dataframe, criteria, ascending)[source]¶
+
pyFTS.benchmarks.Util.
find_best
(dataframe, criteria, ascending)¶-
-
pyFTS.benchmarks.Util.
get_dataframe_from_bd
(file, filter)[source]¶
+ Query the sqlite benchmark database and return a pandas dataframe with the results
@@ -1077,7 +1307,7 @@ informing the list of dispy nodes on ‘nodes’ parameter. -
-
pyFTS.benchmarks.Util.
insert_benchmark
(data, conn)[source]¶
+ Insert benchmark data on database
@@ -1115,17 +1345,17 @@ Value: the measure value -
-
pyFTS.benchmarks.Util.
interval_dataframe_analytic_columns
(experiments)[source]¶
+
pyFTS.benchmarks.Util.
interval_dataframe_analytic_columns
(experiments)¶-
-
pyFTS.benchmarks.Util.
interval_dataframe_synthetic_columns
()[source]¶
+
pyFTS.benchmarks.Util.
interval_dataframe_synthetic_columns
()¶-
-
pyFTS.benchmarks.Util.
open_benchmark_db
(name)[source]¶
+ Open a connection with a Sqlite database designed to store benchmark results.
@@ -1141,47 +1371,70 @@ Value: the measure value -
-
pyFTS.benchmarks.Util.
plot_dataframe_interval
(file_synthetic, file_analytic, experiments, tam, save=False, file=None, sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'], sort_ascend=[True, False, True, True], save_best=False, ignore=None, replace=None)[source]¶
+
pyFTS.benchmarks.Util.
plot_dataframe_interval
(file_synthetic, file_analytic, experiments, tam, save=False, file=None, sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'], sort_ascend=[True, False, True, True], save_best=False, ignore=None, replace=None)¶-
-
pyFTS.benchmarks.Util.
plot_dataframe_interval_pinball
(file_synthetic, file_analytic, experiments, tam, save=False, file=None, sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'], sort_ascend=[True, False, True, True], save_best=False, ignore=None, replace=None)[source]¶
+
pyFTS.benchmarks.Util.
plot_dataframe_interval_pinball
(file_synthetic, file_analytic, experiments, tam, save=False, file=None, sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'], sort_ascend=[True, False, True, True], save_best=False, ignore=None, replace=None)¶-
-
pyFTS.benchmarks.Util.
plot_dataframe_point
(file_synthetic, file_analytic, experiments, tam, save=False, file=None, sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'], sort_ascend=[1, 1, 1, 1], save_best=False, ignore=None, replace=None)[source]¶
+
pyFTS.benchmarks.Util.
plot_dataframe_point
(file_synthetic, file_analytic, experiments, tam, save=False, file=None, sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'], sort_ascend=[1, 1, 1, 1], save_best=False, ignore=None, replace=None)¶-
-
pyFTS.benchmarks.Util.
plot_dataframe_probabilistic
(file_synthetic, file_analytic, experiments, tam, save=False, file=None, sort_columns=['CRPS1AVG', 'CRPS2AVG', 'CRPS1STD', 'CRPS2STD'], sort_ascend=[True, True, True, True], save_best=False, ignore=None, replace=None)[source]¶
+
pyFTS.benchmarks.Util.
plot_dataframe_probabilistic
(file_synthetic, file_analytic, experiments, tam, save=False, file=None, sort_columns=['CRPS1AVG', 'CRPS2AVG', 'CRPS1STD', 'CRPS2STD'], sort_ascend=[True, True, True, True], save_best=False, ignore=None, replace=None)¶-
-
pyFTS.benchmarks.Util.
point_dataframe_analytic_columns
(experiments)[source]¶
+
pyFTS.benchmarks.Util.
point_dataframe_analytic_columns
(experiments)¶-
-
pyFTS.benchmarks.Util.
point_dataframe_synthetic_columns
()[source]¶
+
pyFTS.benchmarks.Util.
point_dataframe_synthetic_columns
()¶-
-
pyFTS.benchmarks.Util.
probabilistic_dataframe_analytic_columns
(experiments)[source]¶
+
pyFTS.benchmarks.Util.
probabilistic_dataframe_analytic_columns
(experiments)¶-
-
pyFTS.benchmarks.Util.
probabilistic_dataframe_synthetic_columns
()[source]¶
+
pyFTS.benchmarks.Util.
probabilistic_dataframe_synthetic_columns
()¶-
-
pyFTS.benchmarks.Util.
process_common_data
(dataset, tag, type, job)[source]¶
+ Wraps benchmark information on a tuple for sqlite database
++
++ + + +Parameters: +-
+
- dataset – benchmark dataset +
- tag – benchmark set alias +
- type – forecasting type +
- job – a dictionary with benchmark data +
+ +Returns: +tuple for sqlite database
+
pyFTS.benchmarks.Util.
process_common_data
(dataset, tag, type, job)¶ +-
+
-
+
pyFTS.benchmarks.Util.
process_common_data2
(dataset, tag, type, job)¶ Wraps benchmark information on a tuple for sqlite database
@@ -1204,12 +1457,12 @@ Value: the measure value -
-
pyFTS.benchmarks.Util.
save_dataframe_interval
(coverage, experiments, file, objs, resolution, save, sharpness, synthetic, times, q05, q25, q75, q95, steps, method)[source]¶
+
pyFTS.benchmarks.Util.
save_dataframe_interval
(coverage, experiments, file, objs, resolution, save, sharpness, synthetic, times, q05, q25, q75, q95, steps, method)¶-
-
pyFTS.benchmarks.Util.
save_dataframe_point
(experiments, file, objs, rmse, save, synthetic, smape, times, u, steps, method)[source]¶
+ Create a dataframe to store the benchmark results
@@ -1237,7 +1490,7 @@ Value: the measure value -
-
pyFTS.benchmarks.Util.
save_dataframe_probabilistic
(experiments, file, objs, crps, times, save, synthetic, steps, method)[source]¶
+ Save benchmark results for m-step ahead probabilistic forecasters :param experiments: :param file: @@ -1253,42 +1506,65 @@ Value: the measure value
+-
+
-
+
pyFTS.benchmarks.Util.
simple_synthetic_dataframe
(file, tag, measure, sql=None)¶
+ Read experiments results from sqlite3 database in ‘file’, make a synthesis of the results +of the metric ‘measure’ with the same ‘tag’, returning a Pandas DataFrame with the mean results.
++
++ + + +Parameters: +-
+
- file – sqlite3 database file name +
- tag – common tag of the experiments +
- measure – metric to synthetize +
+ +Returns: +Pandas DataFrame with the mean results
+
-
-
pyFTS.benchmarks.Util.
tabular_dataframe_columns
()[source]¶
+
pyFTS.benchmarks.Util.
tabular_dataframe_columns
()¶-
-
pyFTS.benchmarks.Util.
unified_scaled_interval
(experiments, tam, save=False, file=None, sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'], sort_ascend=[True, False, True, True], save_best=False, ignore=None, replace=None)[source]¶
+
pyFTS.benchmarks.Util.
unified_scaled_interval
(experiments, tam, save=False, file=None, sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'], sort_ascend=[True, False, True, True], save_best=False, ignore=None, replace=None)¶-
-
pyFTS.benchmarks.Util.
unified_scaled_interval_pinball
(experiments, tam, save=False, file=None, sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'], sort_ascend=[True, False, True, True], save_best=False, ignore=None, replace=None)[source]¶
+
pyFTS.benchmarks.Util.
unified_scaled_interval_pinball
(experiments, tam, save=False, file=None, sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'], sort_ascend=[True, False, True, True], save_best=False, ignore=None, replace=None)¶-
-
pyFTS.benchmarks.Util.
unified_scaled_point
(experiments, tam, save=False, file=None, sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'], sort_ascend=[1, 1, 1, 1], save_best=False, ignore=None, replace=None)[source]¶
+
pyFTS.benchmarks.Util.
unified_scaled_point
(experiments, tam, save=False, file=None, sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'], sort_ascend=[1, 1, 1, 1], save_best=False, ignore=None, replace=None)¶-
-
pyFTS.benchmarks.Util.
unified_scaled_probabilistic
(experiments, tam, save=False, file=None, sort_columns=['CRPSAVG', 'CRPSSTD'], sort_ascend=[True, True], save_best=False, ignore=None, replace=None)[source]¶
+
pyFTS.benchmarks.Util.
unified_scaled_probabilistic
(experiments, tam, save=False, file=None, sort_columns=['CRPSAVG', 'CRPSSTD'], sort_ascend=[True, True], save_best=False, ignore=None, replace=None)¶pyFTS.benchmarks.arima module¶
-
-class
pyFTS.benchmarks.arima.
ARIMA
(**kwargs)[source]¶
+class Bases:
pyFTS.common.fts.FTS
Façade for statsmodels.tsa.arima_model
-
-
forecast
(ndata, **kwargs)[source]¶
+ Point forecast one step ahead
@@ -1327,7 +1603,7 @@ Value: the measure value -
-
forecast_ahead_distribution
(data, steps, **kwargs)[source]¶
+ Probabilistic forecast n steps ahead
@@ -1336,7 +1612,7 @@ Value: the measure value @@ -1349,7 +1625,7 @@ Value: the measure valueParameters: - data – time series data with the minimal length equal to the max_lag of the model
- steps – the number of steps ahead to forecast -
- kwargs – model specific parameters +
- start_at – in the multi step forecasting, the index of the data where to start forecasting (default: 0)
-
-
forecast_ahead_interval
(ndata, steps, **kwargs)[source]¶
+ Interval forecast n steps ahead
@@ -1358,7 +1634,7 @@ Value: the measure value @@ -1371,7 +1647,7 @@ Value: the measure valueParameters: - data – time series data with the minimal length equal to the max_lag of the model
- steps – the number of steps ahead to forecast -
- kwargs – model specific parameters +
- start_at – in the multi step forecasting, the index of the data where to start forecasting (default: 0)
-
-
forecast_distribution
(data, **kwargs)[source]¶
+ Probabilistic forecast one step ahead
@@ -1392,7 +1668,7 @@ Value: the measure value -
-
forecast_interval
(data, **kwargs)[source]¶
+ Interval forecast one step ahead
@@ -1413,12 +1689,12 @@ Value: the measure value -
-
train
(data, **kwargs)[source]¶
+ Method specific parameter fitting
@@ -1441,12 +1717,99 @@ Value: the measure value pyFTS.benchmarks.knn module¶
-
-class
pyFTS.benchmarks.knn.
KNearestNeighbors
(**kwargs)[source]¶
+class Bases:
-pyFTS.common.fts.FTS
K-Nearest Neighbors
+A façade for sklearn.neighbors
+-
+
-
+
forecast
(data, **kwargs)¶
+ Point forecast one step ahead
++
++ + + +Parameters: +-
+
- data – time series data with the minimal length equal to the max_lag of the model +
- kwargs – model specific parameters +
+ +Returns: +a list with the forecasted values
+
-
+
-
+
forecast_ahead
(data, steps, **kwargs)¶
+ Point forecast n steps ahead
++
++ + + +Parameters: +-
+
- data – time series data with the minimal length equal to the max_lag of the model +
- steps – the number of steps ahead to forecast (default: 1) +
- start_at – in the multi step forecasting, the index of the data where to start forecasting (default: 0) +
+ +Returns: +a list with the forecasted values
+
-
+
-
+
forecast_ahead_distribution
(data, steps, **kwargs)¶
+ Probabilistic forecast n steps ahead
++
++ + + +Parameters: +-
+
- data – time series data with the minimal length equal to the max_lag of the model +
- steps – the number of steps ahead to forecast +
- start_at – in the multi step forecasting, the index of the data where to start forecasting (default: 0) +
+ +Returns: +a list with the forecasted Probability Distributions
+
-
+
-
+
forecast_ahead_interval
(data, steps, **kwargs)¶
+ Interval forecast n steps ahead
++
++ + + +Parameters: +-
+
- data – time series data with the minimal length equal to the max_lag of the model +
- steps – the number of steps ahead to forecast +
- start_at – in the multi step forecasting, the index of the data where to start forecasting (default: 0) +
+ +Returns: +a list with the forecasted intervals
+
-
-
forecast_distribution
(data, **kwargs)[source]¶
+ Probabilistic forecast one step ahead
@@ -1465,14 +1828,35 @@ Value: the measure value
forecast_distribution
(data, **kwargs)¶-
+
-
+
forecast_interval
(data, **kwargs)¶
+ Interval forecast one step ahead
++
++ + + +Parameters: +-
+
- data – time series data with the minimal length equal to the max_lag of the model +
- kwargs – model specific parameters +
+ +Returns: +a list with the prediction intervals
+
-
-
train
(data, **kwargs)[source]¶
+ Method specific parameter fitting
@@ -1495,12 +1879,12 @@ Value: the measure value pyFTS.benchmarks.naive module¶
-
-class
pyFTS.benchmarks.naive.
Naive
(**kwargs)[source]¶
+class Bases:
pyFTS.common.fts.FTS
Naïve Forecasting method
-
-
forecast
(data, **kwargs)[source]¶
+ Point forecast one step ahead
@@ -1526,12 +1910,12 @@ Value: the measure value pyFTS.benchmarks.quantreg module¶
-
-class
pyFTS.benchmarks.quantreg.
QuantileRegression
(**kwargs)[source]¶
+class Bases:
pyFTS.common.fts.FTS
Façade for statsmodels.regression.quantile_regression
-
-
forecast
(ndata, **kwargs)[source]¶
+ Point forecast one step ahead
@@ -1552,7 +1936,7 @@ Value: the measure value -
-
forecast_ahead_distribution
(ndata, steps, **kwargs)[source]¶
+ Probabilistic forecast n steps ahead
@@ -1561,7 +1945,7 @@ Value: the measure value @@ -1574,7 +1958,7 @@ Value: the measure valueParameters: - data – time series data with the minimal length equal to the max_lag of the model
- steps – the number of steps ahead to forecast -
- kwargs – model specific parameters +
- start_at – in the multi step forecasting, the index of the data where to start forecasting (default: 0)
-
-
forecast_ahead_interval
(ndata, steps, **kwargs)[source]¶
+ Interval forecast n steps ahead
@@ -1583,7 +1967,7 @@ Value: the measure value @@ -1596,7 +1980,7 @@ Value: the measure valueParameters: - data – time series data with the minimal length equal to the max_lag of the model
- steps – the number of steps ahead to forecast -
- kwargs – model specific parameters +
- start_at – in the multi step forecasting, the index of the data where to start forecasting (default: 0)
-
-
forecast_distribution
(ndata, **kwargs)[source]¶
+ Probabilistic forecast one step ahead
@@ -1617,7 +2001,7 @@ Value: the measure value -
-
forecast_interval
(ndata, **kwargs)[source]¶
+ Interval forecast one step ahead
@@ -1638,22 +2022,319 @@ Value: the measure value -
-
interval_to_interval
(data, lo_params, up_params)[source]¶
+
interval_to_interval
(data, lo_params, up_params)¶-
-
point_to_interval
(data, lo_params, up_params)[source]¶
+
point_to_interval
(data, lo_params, up_params)¶-
-
train
(data, **kwargs)[source]¶
+ Method specific parameter fitting
++
++ + + + +Parameters: +-
+
- data – training time series data +
- kwargs – Method specific parameters +
train
(data, **kwargs)¶ +++pyFTS.benchmarks.gaussianproc module¶
+-
+
-
+class
pyFTS.benchmarks.gaussianproc.
GPR
(**kwargs)¶
+ Bases:
+pyFTS.common.fts.FTS
Façade for sklearn.gaussian_proces
+-
+
-
+
forecast
(data, **kwargs)¶
+ Point forecast one step ahead
++
++ + + +Parameters: +-
+
- data – time series data with the minimal length equal to the max_lag of the model +
- kwargs – model specific parameters +
+ +Returns: +a list with the forecasted values
+
-
+
-
+
forecast_ahead
(data, steps, **kwargs)¶
+ Point forecast n steps ahead
++
++ + + +Parameters: +-
+
- data – time series data with the minimal length equal to the max_lag of the model +
- steps – the number of steps ahead to forecast (default: 1) +
- start_at – in the multi step forecasting, the index of the data where to start forecasting (default: 0) +
+ +Returns: +a list with the forecasted values
+
-
+
-
+
forecast_ahead_distribution
(data, steps, **kwargs)¶
+ Probabilistic forecast n steps ahead
++
++ + + +Parameters: +-
+
- data – time series data with the minimal length equal to the max_lag of the model +
- steps – the number of steps ahead to forecast +
- start_at – in the multi step forecasting, the index of the data where to start forecasting (default: 0) +
+ +Returns: +a list with the forecasted Probability Distributions
+
-
+
-
+
forecast_ahead_interval
(data, steps, **kwargs)¶
+ Interval forecast n steps ahead
++
++ + + +Parameters: +-
+
- data – time series data with the minimal length equal to the max_lag of the model +
- steps – the number of steps ahead to forecast +
- start_at – in the multi step forecasting, the index of the data where to start forecasting (default: 0) +
+ +Returns: +a list with the forecasted intervals
+
-
+
-
+
forecast_distribution
(data, **kwargs)¶
+ Probabilistic forecast one step ahead
++
++ + + +Parameters: +-
+
- data – time series data with the minimal length equal to the max_lag of the model +
- kwargs – model specific parameters +
+ +Returns: +a list with probabilistic.ProbabilityDistribution objects representing the forecasted Probability Distributions
+
-
+
-
+
forecast_interval
(data, **kwargs)¶
+ Interval forecast one step ahead
++
++ + + +Parameters: +-
+
- data – time series data with the minimal length equal to the max_lag of the model +
- kwargs – model specific parameters +
+ +Returns: +a list with the prediction intervals
+
-
+
-
+
train
(data, **kwargs)¶
+ Method specific parameter fitting
++
++ + + + +Parameters: +-
+
- data – training time series data +
- kwargs – Method specific parameters +
-
+
+pyFTS.benchmarks.BSTS module¶
+-
+
-
+class
pyFTS.benchmarks.BSTS.
ARIMA
(**kwargs)¶
+ Bases:
+pyFTS.common.fts.FTS
Façade for statsmodels.tsa.arima_model
+-
+
-
+
forecast
(ndata, **kwargs)¶
+ Point forecast one step ahead
++
++ + + +Parameters: +-
+
- data – time series data with the minimal length equal to the max_lag of the model +
- kwargs – model specific parameters +
+ +Returns: +a list with the forecasted values
+
-
+
-
+
forecast_ahead_distribution
(data, steps, **kwargs)¶
+ Probabilistic forecast n steps ahead
++
++ + + +Parameters: +-
+
- data – time series data with the minimal length equal to the max_lag of the model +
- steps – the number of steps ahead to forecast +
- start_at – in the multi step forecasting, the index of the data where to start forecasting (default: 0) +
+ +Returns: +a list with the forecasted Probability Distributions
+
-
+
-
+
forecast_ahead_interval
(ndata, steps, **kwargs)¶
+ Interval forecast n steps ahead
++
++ + + +Parameters: +-
+
- data – time series data with the minimal length equal to the max_lag of the model +
- steps – the number of steps ahead to forecast +
- start_at – in the multi step forecasting, the index of the data where to start forecasting (default: 0) +
+ +Returns: +a list with the forecasted intervals
+
-
+
-
+
forecast_distribution
(data, **kwargs)¶
+ Probabilistic forecast one step ahead
++
++ + + +Parameters: +-
+
- data – time series data with the minimal length equal to the max_lag of the model +
- kwargs – model specific parameters +
+ +Returns: +a list with probabilistic.ProbabilityDistribution objects representing the forecasted Probability Distributions
+
-
+
-
+
forecast_interval
(data, **kwargs)¶
+ Interval forecast one step ahead
++
++ + + +Parameters: +-
+
- data – time series data with the minimal length equal to the max_lag of the model +
- kwargs – model specific parameters +
+ +Returns: +a list with the prediction intervals
+
-
+
-
+
inference
(steps)¶
+
-
+
-
+
train
(data, **kwargs)¶ Method specific parameter fitting
diff --git a/docs/build/html/pyFTS.common.html b/docs/build/html/pyFTS.common.html index bc34a74..ea00ab8 100644 --- a/docs/build/html/pyFTS.common.html +++ b/docs/build/html/pyFTS.common.html @@ -125,12 +125,12 @@ Composite Fuzzy Sets
-
-class
pyFTS.common.Composite.
FuzzySet
(name, superset=False, **kwargs)[source]¶
+class Bases:
pyFTS.common.FuzzySet.FuzzySet
Composite Fuzzy Set
-
-
append
(mf, parameters)[source]¶
+ Adds a new function to composition
@@ -151,7 +151,7 @@ -
-
append_set
(set)[source]¶
+ Adds a new function to composition
@@ -172,7 +172,7 @@ -
-
membership
(x)[source]¶
+ Calculate the membership value of a given input
@@ -188,7 +188,7 @@ -
-
transform
(x)[source]¶
+ Preprocess the data point for non native types
@@ -210,40 +210,22 @@ This module implements functions for Fuzzy Logical Relationship generation
-
-class
pyFTS.common.FLR.
FLR
(LHS, RHS)[source]¶
+class Bases:
object
Fuzzy Logical Relationship
Represents a temporal transition of the fuzzy set LHS on time t for the fuzzy set RHS on time t+1.
--
-
-
-
LHS
= None¶
- Left Hand Side fuzzy set
-
-
-
-
-
RHS
= None¶
- Right Hand Side fuzzy set
-
-
-
pyFTS.common.FLR.
FLR
(LHS, RHS)¶-
-class
pyFTS.common.FLR.
IndexedFLR
(index, LHS, RHS)[source]¶
+class Bases:
pyFTS.common.FLR.FLR
Season Indexed Fuzzy Logical Relationship
--
-
-
-
index
= None¶
- seasonal index
-
-
-
pyFTS.common.FLR.
IndexedFLR
(index, LHS, RHS)¶-
-
pyFTS.common.FLR.
generate_high_order_recurrent_flr
(fuzzyData)[source]¶
+ Create a ordered FLR set from a list of fuzzy sets with recurrence
@@ -259,7 +241,7 @@ -
-
pyFTS.common.FLR.
generate_indexed_flrs
(sets, indexer, data, transformation=None, alpha_cut=0.0)[source]¶
+ Create a season-indexed ordered FLR set from a list of fuzzy sets with recurrence
@@ -281,7 +263,7 @@ -
-
pyFTS.common.FLR.
generate_non_recurrent_flrs
(fuzzyData)[source]¶
+ Create a ordered FLR set from a list of fuzzy sets without recurrence
@@ -297,7 +279,7 @@ -
-
pyFTS.common.FLR.
generate_recurrent_flrs
(fuzzyData)[source]¶
+ Create a ordered FLR set from a list of fuzzy sets with recurrence
@@ -316,30 +298,12 @@ pyFTS.common.FuzzySet module¶
-
-class
pyFTS.common.FuzzySet.
FuzzySet
(name, mf, parameters, centroid, alpha=1.0, **kwargs)[source]¶
+class Bases:
object
Fuzzy Set
--
-
-
-
Z
= None¶
- Partition function in respect to the membership function
-
-
-
-
-
alpha
= None¶
- The alpha cut value
-
-
-
-
-
centroid
= None¶
- The fuzzy set center of mass (or midpoint)
-
-
-
membership
(x)[source]¶
+ Calculate the membership value of a given input
@@ -353,27 +317,9 @@
membership
(x)¶-
-
-
-
mf
= None¶
- The membership function
-
-
-
-
-
name
= None¶
- The fuzzy set name
-
-
-
-
-
parameters
= None¶
- The parameters of the membership function
-
-
-
partition_function
(uod=None, nbins=100)[source]¶
+ Calculate the partition function over the membership function.
@@ -394,7 +340,7 @@ -
-
transform
(x)[source]¶
+ Preprocess the data point for non native types
@@ -408,33 +354,21 @@
transform
(x)¶-
-
-
-
type
= None¶
- The fuzzy set type (common, composite, nonstationary, etc)
-
-
-
-
-
variable
= None¶
- In multivariate time series, indicate for which variable this fuzzy set belogs
-
-
-
pyFTS.common.FuzzySet.
check_bounds
(data, fuzzy_sets, ordered_sets)[source]¶
+
pyFTS.common.FuzzySet.
check_bounds
(data, fuzzy_sets, ordered_sets)¶-
-
pyFTS.common.FuzzySet.
check_bounds_index
(data, fuzzy_sets, ordered_sets)[source]¶
+
pyFTS.common.FuzzySet.
check_bounds_index
(data, fuzzy_sets, ordered_sets)¶-
-
pyFTS.common.FuzzySet.
fuzzyfy
(data, partitioner, **kwargs)[source]¶
+ A general method for fuzzyfication.
@@ -458,7 +392,7 @@ -
-
pyFTS.common.FuzzySet.
fuzzyfy_instance
(inst, fuzzy_sets, ordered_sets=None)[source]¶
+ Calculate the membership values for a data point given fuzzy sets
@@ -480,7 +414,7 @@ -
-
pyFTS.common.FuzzySet.
fuzzyfy_instances
(data, fuzzy_sets, ordered_sets=None)[source]¶
+ Calculate the membership values for a data point given fuzzy sets
@@ -502,17 +436,17 @@ -
-
pyFTS.common.FuzzySet.
fuzzyfy_series
(data, fuzzy_sets, method='maximum', alpha_cut=0.0, ordered_sets=None)[source]¶
+
pyFTS.common.FuzzySet.
fuzzyfy_series
(data, fuzzy_sets, method='maximum', alpha_cut=0.0, ordered_sets=None)¶-
-
pyFTS.common.FuzzySet.
fuzzyfy_series_old
(data, fuzzy_sets, method='maximum')[source]¶
+
pyFTS.common.FuzzySet.
fuzzyfy_series_old
(data, fuzzy_sets, method='maximum')¶-
-
pyFTS.common.FuzzySet.
get_fuzzysets
(inst, fuzzy_sets, ordered_sets=None, alpha_cut=0.0)[source]¶
+ Return the fuzzy sets which membership value for a inst is greater than the alpha_cut
@@ -535,7 +469,7 @@ -
-
pyFTS.common.FuzzySet.
get_maximum_membership_fuzzyset
(inst, fuzzy_sets, ordered_sets=None)[source]¶
+ Fuzzify a data point, returning the fuzzy set with maximum membership value
@@ -557,7 +491,7 @@ -
-
pyFTS.common.FuzzySet.
get_maximum_membership_fuzzyset_index
(inst, fuzzy_sets)[source]¶
+ Fuzzify a data point, returning the fuzzy set with maximum membership value
@@ -578,12 +512,12 @@ -
-
pyFTS.common.FuzzySet.
grant_bounds
(data, fuzzy_sets, ordered_sets)[source]¶
+
pyFTS.common.FuzzySet.
grant_bounds
(data, fuzzy_sets, ordered_sets)¶-
-
pyFTS.common.FuzzySet.
set_ordered
(fuzzy_sets)[source]¶
+ Order a fuzzy set list by their centroids
@@ -603,7 +537,7 @@ Membership functions for Fuzzy Sets
-
-
pyFTS.common.Membership.
bellmf
(x, parameters)[source]¶
+ Bell shaped membership function
@@ -624,7 +558,7 @@ -
-
pyFTS.common.Membership.
gaussmf
(x, parameters)[source]¶
+ Gaussian fuzzy membership function
@@ -645,7 +579,7 @@ -
-
pyFTS.common.Membership.
sigmf
(x, parameters)[source]¶
+ Sigmoid / Logistic membership function
@@ -664,7 +598,7 @@ -
-
pyFTS.common.Membership.
singleton
(x, parameters)[source]¶
+ Singleton membership function, a single value fuzzy function
@@ -683,7 +617,7 @@ -
-
pyFTS.common.Membership.
trapmf
(x, parameters)[source]¶
+ Trapezoidal fuzzy membership function
@@ -704,7 +638,7 @@ -
-
pyFTS.common.Membership.
trimf
(x, parameters)[source]¶
+ Triangular fuzzy membership function
@@ -728,7 +662,7 @@ pyFTS.common.SortedCollection module¶
-
-class
pyFTS.common.SortedCollection.
SortedCollection
(iterable=(), key=None)[source]¶
+class Bases:
object
Sequence sorted by a key function.
SortedCollection() is much easier to work with than using bisect() directly. @@ -796,81 +730,81 @@ bisect but with a simpler API and support for key functions.
-
-
find_ge
(k)[source]¶
+ Return first item with a key >= equal to k. Raise ValueError if not found
find_ge
(k)¶-
-
insert_right
(item)[source]¶
+ Insert a new item. If equal keys are found, add to the right
insert_right
(item)¶-
@@ -881,7 +815,7 @@ bisect but with a simpler API and support for key functions.
@@ -893,12 +827,12 @@ bisect but with a simpler API and support for key functions.
-
-class
pyFTS.common.Transformations.
AdaptiveExpectation
(parameters)[source]¶
+class Bases:
pyFTS.common.Transformations.Transformation
Adaptive Expectation post processing
-
-
apply
(data, param=None, **kwargs)[source]¶
+ Apply the transformation on input data
@@ -920,7 +854,7 @@ bisect but with a simpler API and support for key functions. -
-
inverse
(data, param, **kwargs)[source]¶
+ @@ -948,12 +882,12 @@ bisect but with a simpler API and support for key functions. -
-class
pyFTS.common.Transformations.
BoxCox
(plambda)[source]¶
+class Bases:
pyFTS.common.Transformations.Transformation
Box-Cox power transformation
-
-
apply
(data, param=None, **kwargs)[source]¶
+ Apply the transformation on input data
@@ -975,7 +909,7 @@ bisect but with a simpler API and support for key functions. -
-
inverse
(data, param=None, **kwargs)[source]¶
+ @@ -1003,12 +937,12 @@ bisect but with a simpler API and support for key functions. -
-class
pyFTS.common.Transformations.
Differential
(lag)[source]¶
+class Bases:
pyFTS.common.Transformations.Transformation
Differentiation data transform
-
-
apply
(data, param=None, **kwargs)[source]¶
+ Apply the transformation on input data
@@ -1030,7 +964,7 @@ bisect but with a simpler API and support for key functions. -
-
inverse
(data, param, **kwargs)[source]¶
+ @@ -1058,12 +992,12 @@ bisect but with a simpler API and support for key functions. -
-class
pyFTS.common.Transformations.
Scale
(min=0, max=1)[source]¶
+class Bases:
pyFTS.common.Transformations.Transformation
Scale data inside a interval [min, max]
pyFTS.common.Transformations.
Scale
(min=0, max=1)¶-
-class
inverse
(data, param, **kwargs)¶-
-
apply
(data, param=None, **kwargs)¶-
-
pyFTS.common.Transformations.
Differential
(lag)¶-
-class
inverse
(data, param=None, **kwargs)¶-
-
apply
(data, param=None, **kwargs)¶-
-
pyFTS.common.Transformations.
BoxCox
(plambda)¶-
-class
inverse
(data, param, **kwargs)¶-
-
apply
(data, param=None, **kwargs)¶-
-
Common data transformation used on pre and post processing of the FTS
pyFTS.common.Transformations.
AdaptiveExpectation
(parameters)¶-
-
pyFTS.common.SortedCollection.
SortedCollection
(iterable=(), key=None)¶-
-class
pyFTS.common.Membership.
trimf
(x, parameters)¶-
-
pyFTS.common.Membership.
trapmf
(x, parameters)¶-
-
pyFTS.common.Membership.
singleton
(x, parameters)¶-
-
pyFTS.common.Membership.
sigmf
(x, parameters)¶-
-
pyFTS.common.Membership.
gaussmf
(x, parameters)¶-
-
pyFTS.common.Membership.
bellmf
(x, parameters)¶-
-
pyFTS.common.FuzzySet.
set_ordered
(fuzzy_sets)¶-
-
pyFTS.common.FuzzySet.
get_maximum_membership_fuzzyset_index
(inst, fuzzy_sets)¶-
-
pyFTS.common.FuzzySet.
get_maximum_membership_fuzzyset
(inst, fuzzy_sets, ordered_sets=None)¶-
-
pyFTS.common.FuzzySet.
get_fuzzysets
(inst, fuzzy_sets, ordered_sets=None, alpha_cut=0.0)¶-
-
pyFTS.common.FuzzySet.
fuzzyfy_instances
(data, fuzzy_sets, ordered_sets=None)¶-
-
pyFTS.common.FuzzySet.
fuzzyfy_instance
(inst, fuzzy_sets, ordered_sets=None)¶-
-
pyFTS.common.FuzzySet.
fuzzyfy
(data, partitioner, **kwargs)¶-
-
partition_function
(uod=None, nbins=100)¶-
-
pyFTS.common.FuzzySet.
FuzzySet
(name, mf, parameters, centroid, alpha=1.0, **kwargs)¶-
-class
pyFTS.common.FLR.
generate_recurrent_flrs
(fuzzyData)¶-
-
pyFTS.common.FLR.
generate_non_recurrent_flrs
(fuzzyData)¶-
-
pyFTS.common.FLR.
generate_indexed_flrs
(sets, indexer, data, transformation=None, alpha_cut=0.0)¶-
-
pyFTS.common.FLR.
generate_high_order_recurrent_flr
(fuzzyData)¶-
-class
transform
(x)¶-
-
membership
(x)¶-
-
append_set
(set)¶-
-
append
(mf, parameters)¶-
-
pyFTS.common.Composite.
FuzzySet
(name, superset=False, **kwargs)¶-
-class
-
+
-
-
forecast_interval
(ndata, **kwargs)¶-
-
forecast_distribution
(ndata, **kwargs)¶
forecast_ahead_interval
(ndata, steps, **kwargs)¶
forecast_ahead_distribution
(ndata, steps, **kwargs)¶-
-
forecast
(ndata, **kwargs)¶-
-
pyFTS.benchmarks.quantreg.
QuantileRegression
(**kwargs)¶-
-class
forecast
(data, **kwargs)¶-
-
pyFTS.benchmarks.naive.
Naive
(**kwargs)¶-
-class
train
(data, **kwargs)¶-
+
pyFTS.benchmarks.knn.
KNearestNeighbors
(**kwargs)¶-
-class
train
(data, **kwargs)¶-
-
forecast_interval
(data, **kwargs)¶-
-
forecast_distribution
(data, **kwargs)¶
forecast_ahead_interval
(ndata, steps, **kwargs)¶
forecast_ahead_distribution
(data, steps, **kwargs)¶-
-
forecast
(ndata, **kwargs)¶-
-
pyFTS.benchmarks.arima.
ARIMA
(**kwargs)¶-
+
pyFTS.benchmarks.Util.
save_dataframe_probabilistic
(experiments, file, objs, crps, times, save, synthetic, steps, method)¶-
-
pyFTS.benchmarks.Util.
save_dataframe_point
(experiments, file, objs, rmse, save, synthetic, smape, times, u, steps, method)¶-
-
-
-
pyFTS.benchmarks.Util.
open_benchmark_db
(name)¶-
-
pyFTS.benchmarks.Util.
insert_benchmark
(data, conn)¶-
-
pyFTS.benchmarks.Util.
get_dataframe_from_bd
(file, filter)¶-
-
pyFTS.benchmarks.Util.
create_benchmark_tables
(conn)¶-
-
pyFTS.benchmarks.ResidualAnalysis.
plotResiduals
(targets, models, tam=[8, 8], save=False, file=None)¶-
-
pyFTS.benchmarks.ResidualAnalysis.
compare_residuals
(data, models)¶-
-
pyFTS.benchmarks.ResidualAnalysis.
chi_squared
(q, h)¶-
-
pyFTS.benchmarks.Measures.
winkler_mean
(tau, targets, forecasts)¶-
-
pyFTS.benchmarks.Measures.
smape
(targets, forecasts, type=2)¶-
-
pyFTS.benchmarks.Measures.
rmse_interval
(targets, forecasts)¶-
-
pyFTS.benchmarks.Measures.
rmse
(targets, forecasts)¶-
-
pyFTS.benchmarks.Measures.
pinball
(tau, target, forecast)¶-
-
pyFTS.benchmarks.Measures.
mape
(targets, forecasts)¶- +
pyFTS.benchmarks.Measures.
get_point_statistics
(data, model, **kwargs)¶-
-
-
-
pyFTS.benchmarks.Measures.
crps
(targets, densities)¶-
-
pyFTS.benchmarks.Measures.
acf
(data, k)¶-
-
pyFTS.benchmarks.Measures.
UStatistic
(targets, forecasts)¶-
-
pyFTS.benchmarks.Measures.
TheilsInequality
(targets, forecasts)¶-
-
-
-
pyFTS.benchmarks.benchmarks.
print_point_statistics
(data, models, externalmodels=None, externalforecasts=None, indexers=None)¶-
-
pyFTS.benchmarks.benchmarks.
print_interval_statistics
(original, models)¶-
-
pyFTS.benchmarks.benchmarks.
print_distribution_statistics
(original, models, steps, resolution)¶-
-
pyFTS.benchmarks.benchmarks.
plot_compared_series
(original, models, colors, typeonlegend=False, save=False, file=None, tam=[20, 5], points=True, intervals=True, linewidth=1.5)¶
Source code for pyFTS.benchmarks.Measures
-# -*- coding: utf8 -*-
-
-"""
-pyFTS module for common benchmark metrics
-"""
-
-import time
-import numpy as np
-import pandas as pd
-from pyFTS.common import FuzzySet, SortedCollection
-from pyFTS.probabilistic import ProbabilityDistribution
-
-
-[docs]def acf(data, k):
- """
- Autocorrelation function estimative
-
- :param data:
- :param k:
- :return:
- """
- mu = np.nanmean(data)
- sigma = np.var(data)
- n = len(data)
- s = 0
- for t in np.arange(0, n - k):
- s += (data[t] - mu) * (data[t + k] - mu)
-
- return 1 / ((n - k) * sigma) * s
-
-
-[docs]def rmse(targets, forecasts):
- """
- Root Mean Squared Error
-
- :param targets:
- :param forecasts:
- :return:
- """
- if isinstance(targets, list):
- targets = np.array(targets)
- if isinstance(forecasts, list):
- forecasts = np.array(forecasts)
- return np.sqrt(np.nanmean((targets - forecasts) ** 2))
-
-
-[docs]def rmse_interval(targets, forecasts):
- """
- Root Mean Squared Error
-
- :param targets:
- :param forecasts:
- :return:
- """
- fmean = [np.mean(i) for i in forecasts]
- return np.sqrt(np.nanmean((fmean - targets) ** 2))
-
-
-[docs]def mape(targets, forecasts):
- """
- Mean Average Percentual Error
-
- :param targets:
- :param forecasts:
- :return:
- """
- if isinstance(targets, list):
- targets = np.array(targets)
- if isinstance(forecasts, list):
- forecasts = np.array(forecasts)
- return np.nanmean(np.abs(np.divide(np.subtract(targets, forecasts), targets))) * 100
-
-
-[docs]def smape(targets, forecasts, type=2):
- """
- Symmetric Mean Average Percentual Error
-
- :param targets:
- :param forecasts:
- :param type:
- :return:
- """
- if isinstance(targets, list):
- targets = np.array(targets)
- if isinstance(forecasts, list):
- forecasts = np.array(forecasts)
- if type == 1:
- return np.nanmean(np.abs(forecasts - targets) / ((forecasts + targets) / 2))
- elif type == 2:
- return np.nanmean(np.abs(forecasts - targets) / (np.abs(forecasts) + abs(targets))) * 100
- else:
- return np.sum(np.abs(forecasts - targets)) / np.sum(forecasts + targets)
-
-
-[docs]def mape_interval(targets, forecasts):
- fmean = [np.mean(i) for i in forecasts]
- return np.mean(abs(fmean - targets) / fmean) * 100
-
-
-[docs]def UStatistic(targets, forecasts):
- """
- Theil's U Statistic
-
- :param targets:
- :param forecasts:
- :return:
- """
- l = len(targets)
- if isinstance(targets, list):
- targets = np.array(targets)
- if isinstance(forecasts, list):
- forecasts = np.array(forecasts)
-
- naive = []
- y = []
- for k in np.arange(0, l - 1):
- y.append(np.subtract(forecasts[k], targets[k]) ** 2)
- naive.append(np.subtract(targets[k + 1], targets[k]) ** 2)
- return np.sqrt(np.divide(np.sum(y), np.sum(naive)))
-
-
-[docs]def TheilsInequality(targets, forecasts):
- """
- Theil’s Inequality Coefficient
-
- :param targets:
- :param forecasts:
- :return:
- """
- res = targets - forecasts
- t = len(res)
- us = np.sqrt(sum([u ** 2 for u in res]))
- ys = np.sqrt(sum([y ** 2 for y in targets]))
- fs = np.sqrt(sum([f ** 2 for f in forecasts]))
- return us / (ys + fs)
-
-
-[docs]def BoxPierceStatistic(data, h):
- """
- Q Statistic for Box-Pierce test
-
- :param data:
- :param h:
- :return:
- """
- n = len(data)
- s = 0
- for k in np.arange(1, h + 1):
- r = acf(data, k)
- s += r ** 2
- return n * s
-
-
-[docs]def BoxLjungStatistic(data, h):
- """
- Q Statistic for Ljung–Box test
-
- :param data:
- :param h:
- :return:
- """
- n = len(data)
- s = 0
- for k in np.arange(1, h + 1):
- r = acf(data, k)
- s += r ** 2 / (n - k)
- return n * (n - 2) * s
-
-
-[docs]def sharpness(forecasts):
- """Sharpness - Mean size of the intervals"""
- tmp = [i[1] - i[0] for i in forecasts]
- return np.mean(tmp)
-
-
-[docs]def resolution(forecasts):
- """Resolution - Standard deviation of the intervals"""
- shp = sharpness(forecasts)
- tmp = [abs((i[1] - i[0]) - shp) for i in forecasts]
- return np.mean(tmp)
-
-
-[docs]def coverage(targets, forecasts):
- """Percent of target values that fall inside forecasted interval"""
- preds = []
- for i in np.arange(0, len(forecasts)):
- if targets[i] >= forecasts[i][0] and targets[i] <= forecasts[i][1]:
- preds.append(1)
- else:
- preds.append(0)
- return np.nanmean(preds)
-
-
-[docs]def pinball(tau, target, forecast):
- """
- Pinball loss function. Measure the distance of forecast to the tau-quantile of the target
-
- :param tau: quantile value in the range (0,1)
- :param target:
- :param forecast:
- :return: float, distance of forecast to the tau-quantile of the target
- """
- if target >= forecast:
- return np.subtract(target, forecast) * tau
- else:
- return np.subtract(forecast, target) * (1 - tau)
-
-
-[docs]def pinball_mean(tau, targets, forecasts):
- """
- Mean pinball loss value of the forecast for a given tau-quantile of the targets
-
- :param tau: quantile value in the range (0,1)
- :param targets: list of target values
- :param forecasts: list of prediction intervals
- :return: float, the pinball loss mean for tau quantile
- """
- if tau <= 0.5:
- preds = [pinball(tau, targets[i], forecasts[i][0]) for i in np.arange(0, len(forecasts))]
- else:
- preds = [pinball(tau, targets[i], forecasts[i][1]) for i in np.arange(0, len(forecasts))]
- return np.nanmean(preds)
-
-
-[docs]def winkler_score(tau, target, forecast):
- '''R. L. Winkler, A Decision-Theoretic Approach to Interval Estimation, J. Am. Stat. Assoc. 67 (337) (1972) 187–191. doi:10.2307/2284720. '''
- delta = forecast[1] - forecast[0]
- if forecast[0] < target and target < forecast[1]:
- return delta
- elif forecast[0] > target:
- return delta + 2 * (forecast[0] - target) / tau
- elif forecast[1] < target:
- return delta + 2 * (target - forecast[1]) / tau
-
-
-[docs]def winkler_mean(tau, targets, forecasts):
- """
- Mean Winkler score value of the forecast for a given tau-quantile of the targets
-
- :param tau: quantile value in the range (0,1)
- :param targets: list of target values
- :param forecasts: list of prediction intervals
- :return: float, the Winkler score mean for tau quantile
- """
- preds = [winkler_score(tau, targets[i], forecasts[i]) for i in np.arange(0, len(forecasts))]
-
- return np.nanmean(preds)
-
-
-[docs]def brier_score(targets, densities):
- '''Brier (1950). "Verification of Forecasts Expressed in Terms of Probability". Monthly Weather Review. 78: 1–3. '''
- ret = []
- for ct, d in enumerate(densities):
- try:
- v = d.bin_index.find_ge(targets[ct])
-
- score = sum([d.distribution[k] ** 2 for k in d.bins if k != v])
- score += (d.distribution[v] - 1) ** 2
- ret.append(score)
- except ValueError as ex:
- ret.append(sum([d.distribution[k] ** 2 for k in d.bins]))
- return sum(ret) / len(ret)
-
-
-[docs]def pmf_to_cdf(density):
- ret = []
- for row in density.index:
- tmp = []
- prev = 0
- for col in density.columns:
- prev += density[col][row] if not np.isnan(density[col][row]) else 0
- tmp.append(prev)
- ret.append(tmp)
- df = pd.DataFrame(ret, columns=density.columns)
- return df
-
-
-
-
-
-[docs]def heavyside_cdf(bins, targets):
- ret = []
- for t in targets:
- result = [1 if b >= t else 0 for b in bins]
- ret.append(result)
- df = pd.DataFrame(ret, columns=bins)
- return df
-
-
-[docs]def crps(targets, densities):
- '''
- Continuous Ranked Probability Score
-
- :param targets: a list with the target values
- :param densities: a list with pyFTS.probabil objectsistic.ProbabilityDistribution
- :return: float
- '''
- _crps = float(0.0)
- if isinstance(densities, ProbabilityDistribution.ProbabilityDistribution):
- densities = [densities]
-
- l = len(densities[0].bins)
- n = len(densities)
- for ct, df in enumerate(densities):
- _crps += sum([(df.cumulative(bin) - (1 if bin >= targets[ct] else 0)) ** 2 for bin in df.bins])
-
- return _crps / float(l * n)
-
-
-[docs]def get_point_statistics(data, model, **kwargs):
- '''
- Condensate all measures for point forecasters
-
- :param data: test data
- :param model: FTS model with point forecasting capability
- :param kwargs:
- :return: a list with the RMSE, SMAPE and U Statistic
- '''
-
- steps_ahead = kwargs.get('steps_ahead', 1)
- kwargs['type'] = 'point'
-
- indexer = kwargs.get('indexer', None)
-
- if indexer is not None:
- ndata = np.array(indexer.get_data(data))
- elif model.is_multivariate:
- if not isinstance(data, pd.DataFrame):
- raise ValueError("Multivariate data must be a Pandas DataFrame!")
- ndata = data
- else:
- ndata = np.array(data)
-
- ret = list()
-
- if steps_ahead == 1:
- forecasts = model.predict(ndata, **kwargs)
-
- if model.is_multivariate and model.has_seasonality:
- ndata = model.indexer.get_data(ndata)
- elif model.is_multivariate:
- ndata = ndata[model.target_variable.data_label].values
-
- if not isinstance(forecasts, (list, np.ndarray)):
- forecasts = [forecasts]
-
- if len(forecasts) != len(ndata) - model.max_lag:
- forecasts = np.array(forecasts[:-1])
- else:
- forecasts = np.array(forecasts)
-
- ret.append(np.round(rmse(ndata[model.max_lag:], forecasts), 2))
- ret.append(np.round(mape(ndata[model.max_lag:], forecasts), 2))
- ret.append(np.round(UStatistic(ndata[model.max_lag:], forecasts), 2))
- else:
- steps_ahead_sampler = kwargs.get('steps_ahead_sampler', 1)
- nforecasts = []
- for k in np.arange(model.order, len(ndata) - steps_ahead, steps_ahead_sampler):
- sample = ndata[k - model.order: k]
- tmp = model.predict(sample, **kwargs)
- nforecasts.append(tmp[-1])
-
- start = model.max_lag + steps_ahead - 1
- ret.append(np.round(rmse(ndata[start:-1:steps_ahead_sampler], nforecasts), 2))
- ret.append(np.round(mape(ndata[start:-1:steps_ahead_sampler], nforecasts), 2))
- ret.append(np.round(UStatistic(ndata[start:-1:steps_ahead_sampler], nforecasts), 2))
-
- return ret
-
-
-[docs]def get_interval_statistics(data, model, **kwargs):
- """
- Condensate all measures for point interval forecasters
-
- :param data: test data
- :param model: FTS model with interval forecasting capability
- :param kwargs:
- :return: a list with the sharpness, resolution, coverage, .05 pinball mean,
- .25 pinball mean, .75 pinball mean and .95 pinball mean.
- """
-
- steps_ahead = kwargs.get('steps_ahead', 1)
- kwargs['type'] = 'interval'
-
- ret = list()
-
- if steps_ahead == 1:
- forecasts = model.predict(data, **kwargs)
- ret.append(round(sharpness(forecasts), 2))
- ret.append(round(resolution(forecasts), 2))
- ret.append(round(coverage(data[model.order:], forecasts[:-1]), 2))
- ret.append(round(pinball_mean(0.05, data[model.max_lag:], forecasts[:-1]), 2))
- ret.append(round(pinball_mean(0.25, data[model.max_lag:], forecasts[:-1]), 2))
- ret.append(round(pinball_mean(0.75, data[model.max_lag:], forecasts[:-1]), 2))
- ret.append(round(pinball_mean(0.95, data[model.max_lag:], forecasts[:-1]), 2))
- ret.append(round(winkler_mean(0.05, data[model.max_lag:], forecasts[:-1]), 2))
- ret.append(round(winkler_mean(0.25, data[model.max_lag:], forecasts[:-1]), 2))
- else:
- forecasts = []
- for k in np.arange(model.order, len(data) - steps_ahead):
- sample = data[k - model.order: k]
- tmp = model.predict(sample, **kwargs)
- forecasts.append(tmp[-1])
-
- start = model.max_lag + steps_ahead - 1
- ret.append(round(sharpness(forecasts), 2))
- ret.append(round(resolution(forecasts), 2))
- ret.append(round(coverage(data[model.max_lag:], forecasts), 2))
- ret.append(round(pinball_mean(0.05, data[start:], forecasts), 2))
- ret.append(round(pinball_mean(0.25, data[start:], forecasts), 2))
- ret.append(round(pinball_mean(0.75, data[start:], forecasts), 2))
- ret.append(round(pinball_mean(0.95, data[start:], forecasts), 2))
- ret.append(round(winkler_mean(0.05, data[start:], forecasts), 2))
- ret.append(round(winkler_mean(0.25, data[start:], forecasts), 2))
- return ret
-
-
-[docs]def get_distribution_statistics(data, model, **kwargs):
- """
- Get CRPS statistic and time for a forecasting model
-
- :param data: test data
- :param model: FTS model with probabilistic forecasting capability
- :param kwargs:
- :return: a list with the CRPS and execution time
- """
- steps_ahead = kwargs.get('steps_ahead', 1)
- kwargs['type'] = 'distribution'
-
- ret = list()
-
- if steps_ahead == 1:
- _s1 = time.time()
- forecasts = model.predict(data, **kwargs)
- _e1 = time.time()
- ret.append(round(crps(data[model.max_lag:], forecasts[:-1]), 3))
- ret.append(round(_e1 - _s1, 3))
- ret.append(round(brier_score(data[model.max_lag:], forecasts[:-1]), 3))
- else:
- skip = kwargs.get('steps_ahead_sampler', 1)
- forecasts = []
- _s1 = time.time()
- for k in np.arange(model.max_lag, len(data) - steps_ahead, skip):
- sample = data[k - model.max_lag: k]
- tmp = model.predict(sample, **kwargs)
- forecasts.append(tmp[-1])
- _e1 = time.time()
-
- start = model.max_lag + steps_ahead
- ret.append(round(crps(data[start:-1:skip], forecasts), 3))
- ret.append(round(_e1 - _s1, 3))
- ret.append(round(brier_score(data[start:-1:skip], forecasts), 3))
- return ret
-
Source code for pyFTS.benchmarks.ResidualAnalysis
-#!/usr/bin/python
-# -*- coding: utf8 -*-
-
-"""Residual Analysis methods"""
-
-import numpy as np
-import pandas as pd
-import matplotlib as plt
-import matplotlib.pyplot as plt
-from pyFTS.common import Transformations,Util
-from pyFTS.benchmarks import Measures
-from scipy import stats
-
-
-[docs]def residuals(targets, forecasts, order=1):
- """First order residuals"""
- return np.array(targets[order:]) - np.array(forecasts[:-1])
-
-
-[docs]def chi_squared(q, h):
- """
- Chi-Squared value
-
- :param q:
- :param h:
- :return:
- """
- p = stats.chi2.sf(q, h)
- return p
-
-
-[docs]def compare_residuals(data, models):
- """
- Compare residual's statistics of several models
-
- :param data: test data
- :param models:
- :return: a Pandas dataframe with the Box-Ljung statistic for each model
- """
- from statsmodels.stats.diagnostic import acorr_ljungbox
- rows = []
- columns = ["Model","Order","AVG","STD","Box-Ljung","p-value"]
- for mfts in models:
- forecasts = mfts.forecast(data)
- res = residuals(data, forecasts, mfts.order)
- mu = np.mean(res)
- sig = np.std(res)
- row = [mfts.shortname, mfts.order, mu, sig]
- stat, pval = acorr_ljungbox(res)
- row.extend([stat, pval])
- rows.append(row)
- return pd.DataFrame(rows, columns=columns)
-
-
-[docs]def plotResiduals(targets, models, tam=[8, 8], save=False, file=None):
- """
- Plot residuals and statistics
-
- :param targets:
- :param models:
- :param tam:
- :param save:
- :param file:
- :return:
- """
- fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam)
- for c, mfts in enumerate(models):
- if len(models) > 1:
- ax = axes[c]
- else:
- ax = axes
- forecasts = mfts.forecast(targets)
- res = residuals(targets,forecasts,mfts.order)
- mu = np.mean(res)
- sig = np.std(res)
-
- ax[0].set_title("Residuals Mean=" + str(mu) + " STD = " + str(sig))
- ax[0].set_ylabel('E')
- ax[0].set_xlabel('T')
- ax[0].plot(res)
-
- ax[1].set_title("Residuals Autocorrelation")
- ax[1].set_ylabel('ACS')
- ax[1].set_xlabel('Lag')
- ax[1].acorr(res)
-
- ax[2].set_title("Residuals Histogram")
- ax[2].set_ylabel('Freq')
- ax[2].set_xlabel('Bins')
- ax[2].hist(res)
-
- c += 1
-
- plt.tight_layout()
-
- Util.show_and_save_image(fig, file, save)
-
-
-[docs]def plot_residuals(targets, models, tam=[8, 8], save=False, file=None):
- fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam)
-
- for c, mfts in enumerate(models, start=0):
- if len(models) > 1:
- ax = axes[c]
- else:
- ax = axes
- forecasts = mfts.forecast(targets)
- res = residuals(targets, forecasts, mfts.order)
- mu = np.mean(res)
- sig = np.std(res)
-
- if c == 0: ax[0].set_title("Residuals", size='large')
- ax[0].set_ylabel(mfts.shortname, size='large')
- ax[0].set_xlabel(' ')
- ax[0].plot(res)
-
- if c == 0: ax[1].set_title("Residuals Autocorrelation", size='large')
- ax[1].set_ylabel('ACS')
- ax[1].set_xlabel('Lag')
- ax[1].acorr(res)
-
- if c == 0: ax[2].set_title("Residuals Histogram", size='large')
- ax[2].set_ylabel('Freq')
- ax[2].set_xlabel('Bins')
- ax[2].hist(res)
-
- plt.tight_layout()
-
- Util.show_and_save_image(fig, file, save)
-
-
-[docs]def single_plot_residuals(targets, forecasts, order, tam=[8, 8], save=False, file=None):
- fig, ax = plt.subplots(nrows=1, ncols=3, figsize=tam)
-
- res = residuals(targets, forecasts, order)
-
- ax[0].set_title("Residuals", size='large')
- ax[0].set_ylabel("Model", size='large')
- ax[0].set_xlabel(' ')
- ax[0].plot(res)
-
- ax[1].set_title("Residuals Autocorrelation", size='large')
- ax[1].set_ylabel('ACS')
- ax[1].set_xlabel('Lag')
- ax[1].acorr(res)
-
- ax[2].set_title("Residuals Histogram", size='large')
- ax[2].set_ylabel('Freq')
- ax[2].set_xlabel('Bins')
- ax[2].hist(res)
-
- plt.tight_layout()
-
- Util.show_and_save_image(fig, file, save)
-
Source code for pyFTS.benchmarks.Util
-"""
-Facilities for pyFTS Benchmark module
-"""
-
-import matplotlib as plt
-import matplotlib.cm as cmx
-import matplotlib.colors as pltcolors
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import sqlite3
-#from mpl_toolkits.mplot3d import Axes3D
-
-
-from copy import deepcopy
-from pyFTS.common import Util
-
-
-[docs]def open_benchmark_db(name):
- """
- Open a connection with a Sqlite database designed to store benchmark results.
-
- :param name: database filenem
- :return: a sqlite3 database connection
- """
- conn = sqlite3.connect(name)
-
- #performance optimizations
- conn.execute("PRAGMA journal_mode = WAL")
- conn.execute("PRAGMA synchronous = NORMAL")
-
- create_benchmark_tables(conn)
- return conn
-
-
-[docs]def create_benchmark_tables(conn):
- """
- Create a sqlite3 table designed to store benchmark results.
-
- :param conn: a sqlite3 database connection
- """
- c = conn.cursor()
-
- c.execute('''CREATE TABLE if not exists benchmarks(
- ID integer primary key, Date int, Dataset text, Tag text,
- Type text, Model text, Transformation text, 'Order' int,
- Scheme text, Partitions int,
- Size int, Steps int, Method text, Measure text, Value real)''')
-
- conn.commit()
-
-
-[docs]def insert_benchmark(data, conn):
- """
- Insert benchmark data on database
-
- :param data: a tuple with the benchmark data with format:
-
- ID: integer incremental primary key
- Date: Date/hour of benchmark execution
- Dataset: Identify on which dataset the dataset was performed
- Tag: a user defined word that indentify a benchmark set
- Type: forecasting type (point, interval, distribution)
- Model: FTS model
- Transformation: The name of data transformation, if one was used
- Order: the order of the FTS method
- Scheme: UoD partitioning scheme
- Partitions: Number of partitions
- Size: Number of rules of the FTS model
- Steps: prediction horizon, i. e., the number of steps ahead
- Measure: accuracy measure
- Value: the measure value
-
- :param conn: a sqlite3 database connection
- :return:
- """
- c = conn.cursor()
-
- c.execute("INSERT INTO benchmarks(Date, Dataset, Tag, Type, Model, "
- + "Transformation, 'Order', Scheme, Partitions, "
- + "Size, Steps, Method, Measure, Value) "
- + "VALUES(datetime('now'),?,?,?,?,?,?,?,?,?,?,?,?,?)", data)
- conn.commit()
-
-
-[docs]def process_common_data(dataset, tag, type, job):
- """
- Wraps benchmark information on a tuple for sqlite database
-
- :param dataset: benchmark dataset
- :param tag: benchmark set alias
- :param type: forecasting type
- :param job: a dictionary with benchmark data
- :return: tuple for sqlite database
- """
- model = job["obj"]
- if model.benchmark_only:
- data = [dataset, tag, type, model.shortname,
- str(model.transformations[0]) if len(model.transformations) > 0 else None,
- model.order, None, None,
- None, job['steps'], job['method']]
- else:
- data = [dataset, tag, type, model.shortname,
- str(model.partitioner.transformation) if model.partitioner.transformation is not None else None,
- model.order, model.partitioner.name, str(model.partitioner.partitions),
- len(model), job['steps'], job['method']]
-
- return data
-
-
-[docs]def get_dataframe_from_bd(file, filter):
- """
- Query the sqlite benchmark database and return a pandas dataframe with the results
-
- :param file: the url of the benchmark database
- :param filter: sql conditions to filter
- :return: pandas dataframe with the query results
- """
- con = sqlite3.connect(file)
- sql = "SELECT * from benchmarks"
- if filter is not None:
- sql += " WHERE " + filter
- return pd.read_sql_query(sql, con)
-
-
-
-[docs]def extract_measure(dataframe, measure, data_columns):
- if not dataframe.empty:
- df = dataframe[(dataframe.Measure == measure)][data_columns]
- tmp = df.to_dict(orient="records")[0]
- ret = [k for k in tmp.values() if not np.isnan(k)]
- return ret
- else:
- return None
-
-
-[docs]def find_best(dataframe, criteria, ascending):
- models = dataframe.Model.unique()
- orders = dataframe.Order.unique()
- ret = {}
- for m in models:
- for o in orders:
- mod = {}
- df = dataframe[(dataframe.Model == m) & (dataframe.Order == o)].sort_values(by=criteria, ascending=ascending)
- if not df.empty:
- _key = str(m) + str(o)
- best = df.loc[df.index[0]]
- mod['Model'] = m
- mod['Order'] = o
- mod['Scheme'] = best["Scheme"]
- mod['Partitions'] = best["Partitions"]
-
- ret[_key] = mod
-
- return ret
-
-
-[docs]def analytic_tabular_dataframe(dataframe):
- experiments = len(dataframe.columns) - len(base_dataframe_columns()) - 1
- models = dataframe.Model.unique()
- orders = dataframe.Order.unique()
- schemes = dataframe.Scheme.unique()
- partitions = dataframe.Partitions.unique()
- steps = dataframe.Steps.unique()
- measures = dataframe.Measure.unique()
- data_columns = analytical_data_columns(experiments)
-
- ret = []
-
- for m in models:
- for o in orders:
- for s in schemes:
- for p in partitions:
- for st in steps:
- for ms in measures:
- df = dataframe[(dataframe.Model == m) & (dataframe.Order == o)
- & (dataframe.Scheme == s) & (dataframe.Partitions == p)
- & (dataframe.Steps == st) & (dataframe.Measure == ms) ]
-
- if not df.empty:
- for col in data_columns:
- mod = [m, o, s, p, st, ms, df[col].values[0]]
- ret.append(mod)
-
- dat = pd.DataFrame(ret, columns=tabular_dataframe_columns())
- return dat
-
-
-[docs]def tabular_dataframe_columns():
- return ["Model", "Order", "Scheme", "Partitions", "Steps", "Measure", "Value"]
-
-
-[docs]def base_dataframe_columns():
- return ["Model", "Order", "Scheme", "Partitions", "Size", "Steps", "Method"]
-
-[docs]def point_dataframe_synthetic_columns():
- return base_dataframe_columns().extend(["RMSEAVG", "RMSESTD",
- "SMAPEAVG", "SMAPESTD", "UAVG","USTD", "TIMEAVG", "TIMESTD"])
-
-
-[docs]def point_dataframe_analytic_columns(experiments):
- columns = [str(k) for k in np.arange(0, experiments)]
- columns.insert(0, "Model")
- columns.insert(1, "Order")
- columns.insert(2, "Scheme")
- columns.insert(3, "Partitions")
- columns.insert(4, "Size")
- columns.insert(5, "Steps")
- columns.insert(6, "Method")
- columns.insert(7, "Measure")
- return columns
-
-
-[docs]def save_dataframe_point(experiments, file, objs, rmse, save, synthetic, smape, times, u, steps, method):
- """
- Create a dataframe to store the benchmark results
-
- :param experiments: dictionary with the execution results
- :param file:
- :param objs:
- :param rmse:
- :param save:
- :param synthetic:
- :param smape:
- :param times:
- :param u:
- :return:
- """
- ret = []
-
- if synthetic:
-
- for k in sorted(objs.keys()):
- try:
- mod = []
- mfts = objs[k]
- mod.append(mfts.shortname)
- mod.append(mfts.order)
- if not mfts.benchmark_only:
- mod.append(mfts.partitioner.name)
- mod.append(mfts.partitioner.partitions)
- mod.append(len(mfts))
- else:
- mod.append('-')
- mod.append('-')
- mod.append('-')
- mod.append(steps[k])
- mod.append(method[k])
- mod.append(np.round(np.nanmean(rmse[k]), 2))
- mod.append(np.round(np.nanstd(rmse[k]), 2))
- mod.append(np.round(np.nanmean(smape[k]), 2))
- mod.append(np.round(np.nanstd(smape[k]), 2))
- mod.append(np.round(np.nanmean(u[k]), 2))
- mod.append(np.round(np.nanstd(u[k]), 2))
- mod.append(np.round(np.nanmean(times[k]), 4))
- mod.append(np.round(np.nanstd(times[k]), 4))
- ret.append(mod)
- except Exception as ex:
- print("Erro ao salvar ", k)
- print("Exceção ", ex)
-
- columns = point_dataframe_synthetic_columns()
- else:
- for k in sorted(objs.keys()):
- try:
- mfts = objs[k]
- n = mfts.shortname
- o = mfts.order
- if not mfts.benchmark_only:
- s = mfts.partitioner.name
- p = mfts.partitioner.partitions
- l = len(mfts)
- else:
- s = '-'
- p = '-'
- l = '-'
- st = steps[k]
- mt = method[k]
- tmp = [n, o, s, p, l, st, mt, 'RMSE']
- tmp.extend(rmse[k])
- ret.append(deepcopy(tmp))
- tmp = [n, o, s, p, l, st, mt, 'SMAPE']
- tmp.extend(smape[k])
- ret.append(deepcopy(tmp))
- tmp = [n, o, s, p, l, st, mt, 'U']
- tmp.extend(u[k])
- ret.append(deepcopy(tmp))
- tmp = [n, o, s, p, l, st, mt, 'TIME']
- tmp.extend(times[k])
- ret.append(deepcopy(tmp))
- except Exception as ex:
- print("Erro ao salvar ", k)
- print("Exceção ", ex)
- columns = point_dataframe_analytic_columns(experiments)
- try:
- dat = pd.DataFrame(ret, columns=columns)
- if save: dat.to_csv(Util.uniquefilename(file), sep=";", index=False)
- return dat
- except Exception as ex:
- print(ex)
- print(experiments)
- print(columns)
- print(ret)
-
-
-[docs]def cast_dataframe_to_synthetic(infile, outfile, experiments, type):
- if type == 'point':
- analytic_columns = point_dataframe_analytic_columns
- synthetic_columns = point_dataframe_synthetic_columns
- synthetize_measures = cast_dataframe_to_synthetic_point
- elif type == 'interval':
- analytic_columns = interval_dataframe_analytic_columns
- synthetic_columns = interval_dataframe_synthetic_columns
- synthetize_measures = cast_dataframe_to_synthetic_interval
- elif type == 'distribution':
- analytic_columns = probabilistic_dataframe_analytic_columns
- synthetic_columns = probabilistic_dataframe_synthetic_columns
- synthetize_measures = cast_dataframe_to_synthetic_probabilistic
- else:
- raise ValueError("Type parameter has an unknown value!")
-
- columns = analytic_columns(experiments)
- dat = pd.read_csv(infile, sep=";", usecols=columns)
- models = dat.Model.unique()
- orders = dat.Order.unique()
- schemes = dat.Scheme.unique()
- partitions = dat.Partitions.unique()
- steps = dat.Steps.unique()
- methods = dat.Method.unique()
-
- data_columns = analytical_data_columns(experiments)
-
- ret = []
-
- for m in models:
- for o in orders:
- for s in schemes:
- for p in partitions:
- for st in steps:
- for mt in methods:
- df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) &
- (dat.Partitions == p) & (dat.Steps == st) & (dat.Method == mt)]
- if not df.empty:
- mod = synthetize_measures(df, data_columns)
- mod.insert(0, m)
- mod.insert(1, o)
- mod.insert(2, s)
- mod.insert(3, p)
- mod.insert(4, df.iat[0,5])
- mod.insert(5, st)
- mod.insert(6, mt)
- ret.append(mod)
-
- dat = pd.DataFrame(ret, columns=synthetic_columns())
- dat.to_csv(outfile, sep=";", index=False)
-
-
-[docs]def cast_dataframe_to_synthetic_point(df, data_columns):
- ret = []
- rmse = extract_measure(df, 'RMSE', data_columns)
- smape = extract_measure(df, 'SMAPE', data_columns)
- u = extract_measure(df, 'U', data_columns)
- times = extract_measure(df, 'TIME', data_columns)
- ret.append(np.round(np.nanmean(rmse), 2))
- ret.append(np.round(np.nanstd(rmse), 2))
- ret.append(np.round(np.nanmean(smape), 2))
- ret.append(np.round(np.nanstd(smape), 2))
- ret.append(np.round(np.nanmean(u), 2))
- ret.append(np.round(np.nanstd(u), 2))
- ret.append(np.round(np.nanmean(times), 4))
- ret.append(np.round(np.nanstd(times), 4))
-
- return ret
-
-
-[docs]def analytical_data_columns(experiments):
- data_columns = [str(k) for k in np.arange(0, experiments)]
- return data_columns
-
-
-[docs]def scale_params(data):
- vmin = np.nanmin(data)
- vlen = np.nanmax(data) - vmin
- return (vmin, vlen)
-
-
-
-
-
-
-
-
-
-[docs]def unified_scaled_point(experiments, tam, save=False, file=None,
- sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'],
- sort_ascend=[1, 1, 1, 1],save_best=False,
- ignore=None, replace=None):
-
- fig, axes = plt.subplots(nrows=3, ncols=1, figsize=tam)
-
- axes[0].set_title('RMSE')
- axes[1].set_title('SMAPE')
- axes[2].set_title('U Statistic')
-
- models = {}
-
- for experiment in experiments:
-
- mdl = {}
-
- dat_syn = pd.read_csv(experiment[0], sep=";", usecols=point_dataframe_synthetic_columns())
-
- bests = find_best(dat_syn, sort_columns, sort_ascend)
-
- dat_ana = pd.read_csv(experiment[1], sep=";", usecols=point_dataframe_analytic_columns(experiment[2]))
-
- rmse = []
- smape = []
- u = []
- times = []
-
- data_columns = analytical_data_columns(experiment[2])
-
- for b in sorted(bests.keys()):
- if check_ignore_list(b, ignore):
- continue
-
- if b not in models:
- models[b] = {}
- models[b]['rmse'] = []
- models[b]['smape'] = []
- models[b]['u'] = []
- models[b]['times'] = []
-
- if b not in mdl:
- mdl[b] = {}
- mdl[b]['rmse'] = []
- mdl[b]['smape'] = []
- mdl[b]['u'] = []
- mdl[b]['times'] = []
-
- best = bests[b]
- tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
- & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
- tmpl = extract_measure(tmp,'RMSE',data_columns)
- mdl[b]['rmse'].extend( tmpl )
- rmse.extend( tmpl )
- tmpl = extract_measure(tmp, 'SMAPE', data_columns)
- mdl[b]['smape'].extend(tmpl)
- smape.extend(tmpl)
- tmpl = extract_measure(tmp, 'U', data_columns)
- mdl[b]['u'].extend(tmpl)
- u.extend(tmpl)
- tmpl = extract_measure(tmp, 'TIME', data_columns)
- mdl[b]['times'].extend(tmpl)
- times.extend(tmpl)
-
- models[b]['label'] = check_replace_list(best["Model"] + " " + str(best["Order"]), replace)
-
- print("GLOBAL")
- rmse_param = scale_params(rmse)
- stats("rmse", rmse)
- smape_param = scale_params(smape)
- stats("smape", smape)
- u_param = scale_params(u)
- stats("u", u)
- times_param = scale_params(times)
-
- for key in sorted(models.keys()):
- models[key]['rmse'].extend( scale(mdl[key]['rmse'], rmse_param) )
- models[key]['smape'].extend( scale(mdl[key]['smape'], smape_param) )
- models[key]['u'].extend( scale(mdl[key]['u'], u_param) )
- models[key]['times'].extend( scale(mdl[key]['times'], times_param) )
-
- rmse = []
- smape = []
- u = []
- times = []
- labels = []
- for key in sorted(models.keys()):
- print(key)
- rmse.append(models[key]['rmse'])
- stats("rmse", models[key]['rmse'])
- smape.append(models[key]['smape'])
- stats("smape", models[key]['smape'])
- u.append(models[key]['u'])
- stats("u", models[key]['u'])
- times.append(models[key]['times'])
- labels.append(models[key]['label'])
-
- axes[0].boxplot(rmse, labels=labels, autorange=True, showmeans=True)
- axes[0].set_title("RMSE")
- axes[1].boxplot(smape, labels=labels, autorange=True, showmeans=True)
- axes[1].set_title("SMAPE")
- axes[2].boxplot(u, labels=labels, autorange=True, showmeans=True)
- axes[2].set_title("U Statistic")
-
- plt.tight_layout()
-
- Util.show_and_save_image(fig, file, save)
-
-
-[docs]def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
- sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'],
- sort_ascend=[1, 1, 1, 1],save_best=False,
- ignore=None,replace=None):
-
- fig, axes = plt.subplots(nrows=3, ncols=1, figsize=tam)
-
- axes[0].set_title('RMSE')
- axes[1].set_title('SMAPE')
- axes[2].set_title('U Statistic')
-
- dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=point_dataframe_synthetic_columns())
-
- bests = find_best(dat_syn, sort_columns, sort_ascend)
-
- dat_ana = pd.read_csv(file_analytic, sep=";", usecols=point_dataframe_analytic_columns(experiments))
-
- data_columns = analytical_data_columns(experiments)
-
- if save_best:
- dat = pd.DataFrame.from_dict(bests, orient='index')
- dat.to_csv(Util.uniquefilename(file_synthetic.replace("synthetic","best")), sep=";", index=False)
-
- rmse = []
- smape = []
- u = []
- times = []
- labels = []
-
- for b in sorted(bests.keys()):
- if check_ignore_list(b, ignore):
- continue
-
- best = bests[b]
- tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
- & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
- rmse.append( extract_measure(tmp,'RMSE',data_columns) )
- smape.append(extract_measure(tmp, 'SMAPE', data_columns))
- u.append(extract_measure(tmp, 'U', data_columns))
- times.append(extract_measure(tmp, 'TIME', data_columns))
-
- labels.append(check_replace_list(best["Model"] + " " + str(best["Order"]),replace))
-
- axes[0].boxplot(rmse, labels=labels, autorange=True, showmeans=True)
- axes[0].set_title("RMSE")
- axes[1].boxplot(smape, labels=labels, autorange=True, showmeans=True)
- axes[1].set_title("SMAPE")
- axes[2].boxplot(u, labels=labels, autorange=True, showmeans=True)
- axes[2].set_title("U Statistic")
-
- plt.tight_layout()
-
- Util.show_and_save_image(fig, file, save)
-
-
-
-[docs]def check_replace_list(m, replace):
- if replace is not None:
- for r in replace:
- if r[0] in m:
- return r[1]
- return m
-
-
-
-[docs]def check_ignore_list(b, ignore):
- flag = False
- if ignore is not None:
- for i in ignore:
- if i in b:
- flag = True
- return flag
-
-
-[docs]def save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, synthetic, times,
- q05, q25, q75, q95, steps, method):
- ret = []
- if synthetic:
- for k in sorted(objs.keys()):
- mod = []
- mfts = objs[k]
- mod.append(mfts.shortname)
- mod.append(mfts.order)
- l = len(mfts)
- if not mfts.benchmark_only:
- mod.append(mfts.partitioner.name)
- mod.append(mfts.partitioner.partitions)
- mod.append(l)
- else:
- mod.append('-')
- mod.append('-')
- mod.append('-')
- mod.append(steps[k])
- mod.append(method[k])
- mod.append(round(np.nanmean(sharpness[k]), 2))
- mod.append(round(np.nanstd(sharpness[k]), 2))
- mod.append(round(np.nanmean(resolution[k]), 2))
- mod.append(round(np.nanstd(resolution[k]), 2))
- mod.append(round(np.nanmean(coverage[k]), 2))
- mod.append(round(np.nanstd(coverage[k]), 2))
- mod.append(round(np.nanmean(times[k]), 2))
- mod.append(round(np.nanstd(times[k]), 2))
- mod.append(round(np.nanmean(q05[k]), 2))
- mod.append(round(np.nanstd(q05[k]), 2))
- mod.append(round(np.nanmean(q25[k]), 2))
- mod.append(round(np.nanstd(q25[k]), 2))
- mod.append(round(np.nanmean(q75[k]), 2))
- mod.append(round(np.nanstd(q75[k]), 2))
- mod.append(round(np.nanmean(q95[k]), 2))
- mod.append(round(np.nanstd(q95[k]), 2))
- mod.append(l)
- ret.append(mod)
-
- columns = interval_dataframe_synthetic_columns()
- else:
- for k in sorted(objs.keys()):
- try:
- mfts = objs[k]
- n = mfts.shortname
- o = mfts.order
- if not mfts.benchmark_only:
- s = mfts.partitioner.name
- p = mfts.partitioner.partitions
- l = len(mfts)
- else:
- s = '-'
- p = '-'
- l = '-'
- st = steps[k]
- mt = method[k]
- tmp = [n, o, s, p, l, st, mt, 'Sharpness']
- tmp.extend(sharpness[k])
- ret.append(deepcopy(tmp))
- tmp = [n, o, s, p, l, st, mt, 'Resolution']
- tmp.extend(resolution[k])
- ret.append(deepcopy(tmp))
- tmp = [n, o, s, p, l, st, mt, 'Coverage']
- tmp.extend(coverage[k])
- ret.append(deepcopy(tmp))
- tmp = [n, o, s, p, l, st, mt, 'TIME']
- tmp.extend(times[k])
- ret.append(deepcopy(tmp))
- tmp = [n, o, s, p, l, st, mt, 'Q05']
- tmp.extend(q05[k])
- ret.append(deepcopy(tmp))
- tmp = [n, o, s, p, l, st, mt, 'Q25']
- tmp.extend(q25[k])
- ret.append(deepcopy(tmp))
- tmp = [n, o, s, p, l, st, mt, 'Q75']
- tmp.extend(q75[k])
- ret.append(deepcopy(tmp))
- tmp = [n, o, s, p, l, st, mt, 'Q95']
- tmp.extend(q95[k])
- ret.append(deepcopy(tmp))
- except Exception as ex:
- print("Erro ao salvar ", k)
- print("Exceção ", ex)
- columns = interval_dataframe_analytic_columns(experiments)
- dat = pd.DataFrame(ret, columns=columns)
- if save: dat.to_csv(Util.uniquefilename(file), sep=";")
- return dat
-
-
-[docs]def interval_dataframe_analytic_columns(experiments):
- columns = [str(k) for k in np.arange(0, experiments)]
- columns.insert(0, "Model")
- columns.insert(1, "Order")
- columns.insert(2, "Scheme")
- columns.insert(3, "Partitions")
- columns.insert(4, "Size")
- columns.insert(5, "Steps")
- columns.insert(6, "Method")
- columns.insert(7, "Measure")
- return columns
-
-
-
-[docs]def interval_dataframe_synthetic_columns():
- columns = ["Model", "Order", "Scheme", "Partitions","SIZE", "Steps","Method" "SHARPAVG", "SHARPSTD", "RESAVG", "RESSTD", "COVAVG",
- "COVSTD", "TIMEAVG", "TIMESTD", "Q05AVG", "Q05STD", "Q25AVG", "Q25STD", "Q75AVG", "Q75STD", "Q95AVG", "Q95STD"]
- return columns
-
-
-[docs]def cast_dataframe_to_synthetic_interval(df, data_columns):
- sharpness = extract_measure(df, 'Sharpness', data_columns)
- resolution = extract_measure(df, 'Resolution', data_columns)
- coverage = extract_measure(df, 'Coverage', data_columns)
- times = extract_measure(df, 'TIME', data_columns)
- q05 = extract_measure(df, 'Q05', data_columns)
- q25 = extract_measure(df, 'Q25', data_columns)
- q75 = extract_measure(df, 'Q75', data_columns)
- q95 = extract_measure(df, 'Q95', data_columns)
- ret = []
- ret.append(np.round(np.nanmean(sharpness), 2))
- ret.append(np.round(np.nanstd(sharpness), 2))
- ret.append(np.round(np.nanmean(resolution), 2))
- ret.append(np.round(np.nanstd(resolution), 2))
- ret.append(np.round(np.nanmean(coverage), 2))
- ret.append(np.round(np.nanstd(coverage), 2))
- ret.append(np.round(np.nanmean(times), 4))
- ret.append(np.round(np.nanstd(times), 4))
- ret.append(np.round(np.nanmean(q05), 4))
- ret.append(np.round(np.nanstd(q05), 4))
- ret.append(np.round(np.nanmean(q25), 4))
- ret.append(np.round(np.nanstd(q25), 4))
- ret.append(np.round(np.nanmean(q75), 4))
- ret.append(np.round(np.nanstd(q75), 4))
- ret.append(np.round(np.nanmean(q95), 4))
- ret.append(np.round(np.nanstd(q95), 4))
- return ret
-
-
-
-
-[docs]def unified_scaled_interval(experiments, tam, save=False, file=None,
- sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'],
- sort_ascend=[True, False, True, True],save_best=False,
- ignore=None, replace=None):
- fig, axes = plt.subplots(nrows=3, ncols=1, figsize=tam)
-
- axes[0].set_title('Sharpness')
- axes[1].set_title('Resolution')
- axes[2].set_title('Coverage')
-
- models = {}
-
- for experiment in experiments:
-
- mdl = {}
-
- dat_syn = pd.read_csv(experiment[0], sep=";", usecols=interval_dataframe_synthetic_columns())
-
- bests = find_best(dat_syn, sort_columns, sort_ascend)
-
- dat_ana = pd.read_csv(experiment[1], sep=";", usecols=interval_dataframe_analytic_columns(experiment[2]))
-
- sharpness = []
- resolution = []
- coverage = []
- times = []
-
- data_columns = analytical_data_columns(experiment[2])
-
- for b in sorted(bests.keys()):
- if check_ignore_list(b, ignore):
- continue
-
- if b not in models:
- models[b] = {}
- models[b]['sharpness'] = []
- models[b]['resolution'] = []
- models[b]['coverage'] = []
- models[b]['times'] = []
-
- if b not in mdl:
- mdl[b] = {}
- mdl[b]['sharpness'] = []
- mdl[b]['resolution'] = []
- mdl[b]['coverage'] = []
- mdl[b]['times'] = []
-
- best = bests[b]
- print(best)
- tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
- & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
- tmpl = extract_measure(tmp, 'Sharpness', data_columns)
- mdl[b]['sharpness'].extend(tmpl)
- sharpness.extend(tmpl)
- tmpl = extract_measure(tmp, 'Resolution', data_columns)
- mdl[b]['resolution'].extend(tmpl)
- resolution.extend(tmpl)
- tmpl = extract_measure(tmp, 'Coverage', data_columns)
- mdl[b]['coverage'].extend(tmpl)
- coverage.extend(tmpl)
- tmpl = extract_measure(tmp, 'TIME', data_columns)
- mdl[b]['times'].extend(tmpl)
- times.extend(tmpl)
-
- models[b]['label'] = check_replace_list(best["Model"] + " " + str(best["Order"]), replace)
-
- sharpness_param = scale_params(sharpness)
- resolution_param = scale_params(resolution)
- coverage_param = scale_params(coverage)
- times_param = scale_params(times)
-
- for key in sorted(models.keys()):
- models[key]['sharpness'].extend(scale(mdl[key]['sharpness'], sharpness_param))
- models[key]['resolution'].extend(scale(mdl[key]['resolution'], resolution_param))
- models[key]['coverage'].extend(scale(mdl[key]['coverage'], coverage_param))
- models[key]['times'].extend(scale(mdl[key]['times'], times_param))
-
- sharpness = []
- resolution = []
- coverage = []
- times = []
- labels = []
- for key in sorted(models.keys()):
- sharpness.append(models[key]['sharpness'])
- resolution.append(models[key]['resolution'])
- coverage.append(models[key]['coverage'])
- times.append(models[key]['times'])
- labels.append(models[key]['label'])
-
- axes[0].boxplot(sharpness, labels=labels, autorange=True, showmeans=True)
- axes[1].boxplot(resolution, labels=labels, autorange=True, showmeans=True)
- axes[2].boxplot(coverage, labels=labels, autorange=True, showmeans=True)
-
- plt.tight_layout()
-
- Util.show_and_save_image(fig, file, save)
-
-
-
-[docs]def plot_dataframe_interval(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
- sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'],
- sort_ascend=[True, False, True, True],save_best=False,
- ignore=None, replace=None):
-
- fig, axes = plt.subplots(nrows=3, ncols=1, figsize=tam)
-
- axes[0].set_title('Sharpness')
- axes[1].set_title('Resolution')
- axes[2].set_title('Coverage')
-
- dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=interval_dataframe_synthetic_columns())
-
- bests = find_best(dat_syn, sort_columns, sort_ascend)
-
- dat_ana = pd.read_csv(file_analytic, sep=";", usecols=interval_dataframe_analytic_columns(experiments))
-
- data_columns = analytical_data_columns(experiments)
-
- if save_best:
- dat = pd.DataFrame.from_dict(bests, orient='index')
- dat.to_csv(Util.uniquefilename(file_synthetic.replace("synthetic","best")), sep=";", index=False)
-
- sharpness = []
- resolution = []
- coverage = []
- times = []
- labels = []
- bounds_shp = []
-
- for b in sorted(bests.keys()):
- if check_ignore_list(b, ignore):
- continue
- best = bests[b]
- df = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
- & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
- sharpness.append( extract_measure(df,'Sharpness',data_columns) )
- resolution.append(extract_measure(df, 'Resolution', data_columns))
- coverage.append(extract_measure(df, 'Coverage', data_columns))
- times.append(extract_measure(df, 'TIME', data_columns))
- labels.append(check_replace_list(best["Model"] + " " + str(best["Order"]), replace))
-
- axes[0].boxplot(sharpness, labels=labels, autorange=True, showmeans=True)
- axes[0].set_title("Sharpness")
- axes[1].boxplot(resolution, labels=labels, autorange=True, showmeans=True)
- axes[1].set_title("Resolution")
- axes[2].boxplot(coverage, labels=labels, autorange=True, showmeans=True)
- axes[2].set_title("Coverage")
- axes[2].set_ylim([0, 1.1])
-
- plt.tight_layout()
-
- Util.show_and_save_image(fig, file, save)
-
-
-
-[docs]def unified_scaled_interval_pinball(experiments, tam, save=False, file=None,
- sort_columns=['COVAVG','SHARPAVG','COVSTD','SHARPSTD'],
- sort_ascend=[True, False, True, True], save_best=False,
- ignore=None, replace=None):
- fig, axes = plt.subplots(nrows=1, ncols=4, figsize=tam)
- axes[0].set_title(r'$\tau=0.05$')
- axes[1].set_title(r'$\tau=0.25$')
- axes[2].set_title(r'$\tau=0.75$')
- axes[3].set_title(r'$\tau=0.95$')
- models = {}
-
- for experiment in experiments:
-
- mdl = {}
-
- dat_syn = pd.read_csv(experiment[0], sep=";", usecols=interval_dataframe_synthetic_columns())
-
- bests = find_best(dat_syn, sort_columns, sort_ascend)
-
- dat_ana = pd.read_csv(experiment[1], sep=";", usecols=interval_dataframe_analytic_columns(experiment[2]))
-
- q05 = []
- q25 = []
- q75 = []
- q95 = []
-
- data_columns = analytical_data_columns(experiment[2])
-
- for b in sorted(bests.keys()):
- if check_ignore_list(b, ignore):
- continue
-
- if b not in models:
- models[b] = {}
- models[b]['q05'] = []
- models[b]['q25'] = []
- models[b]['q75'] = []
- models[b]['q95'] = []
-
- if b not in mdl:
- mdl[b] = {}
- mdl[b]['q05'] = []
- mdl[b]['q25'] = []
- mdl[b]['q75'] = []
- mdl[b]['q95'] = []
-
- best = bests[b]
- print(best)
- tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
- & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
- tmpl = extract_measure(tmp, 'Q05', data_columns)
- mdl[b]['q05'].extend(tmpl)
- q05.extend(tmpl)
- tmpl = extract_measure(tmp, 'Q25', data_columns)
- mdl[b]['q25'].extend(tmpl)
- q25.extend(tmpl)
- tmpl = extract_measure(tmp, 'Q75', data_columns)
- mdl[b]['q75'].extend(tmpl)
- q75.extend(tmpl)
- tmpl = extract_measure(tmp, 'Q95', data_columns)
- mdl[b]['q95'].extend(tmpl)
- q95.extend(tmpl)
-
- models[b]['label'] = check_replace_list(best["Model"] + " " + str(best["Order"]), replace)
-
- q05_param = scale_params(q05)
- q25_param = scale_params(q25)
- q75_param = scale_params(q75)
- q95_param = scale_params(q95)
-
- for key in sorted(models.keys()):
- models[key]['q05'].extend(scale(mdl[key]['q05'], q05_param))
- models[key]['q25'].extend(scale(mdl[key]['q25'], q25_param))
- models[key]['q75'].extend(scale(mdl[key]['q75'], q75_param))
- models[key]['q95'].extend(scale(mdl[key]['q95'], q95_param))
-
- q05 = []
- q25 = []
- q75 = []
- q95 = []
- labels = []
- for key in sorted(models.keys()):
- q05.append(models[key]['q05'])
- q25.append(models[key]['q25'])
- q75.append(models[key]['q75'])
- q95.append(models[key]['q95'])
- labels.append(models[key]['label'])
-
- axes[0].boxplot(q05, labels=labels, vert=False, autorange=True, showmeans=True)
- axes[1].boxplot(q25, labels=labels, vert=False, autorange=True, showmeans=True)
- axes[2].boxplot(q75, labels=labels, vert=False, autorange=True, showmeans=True)
- axes[3].boxplot(q95, labels=labels, vert=False, autorange=True, showmeans=True)
-
- plt.tight_layout()
-
- Util.show_and_save_image(fig, file, save)
-
-
-
-[docs]def plot_dataframe_interval_pinball(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
- sort_columns=['COVAVG','SHARPAVG','COVSTD','SHARPSTD'],
- sort_ascend=[True, False, True, True], save_best=False,
- ignore=None, replace=None):
-
- fig, axes = plt.subplots(nrows=1, ncols=4, figsize=tam)
- axes[0].set_title(r'$\tau=0.05$')
- axes[1].set_title(r'$\tau=0.25$')
- axes[2].set_title(r'$\tau=0.75$')
- axes[3].set_title(r'$\tau=0.95$')
-
- dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=interval_dataframe_synthetic_columns())
-
- bests = find_best(dat_syn, sort_columns, sort_ascend)
-
- dat_ana = pd.read_csv(file_analytic, sep=";", usecols=interval_dataframe_analytic_columns(experiments))
-
- data_columns = analytical_data_columns(experiments)
-
- if save_best:
- dat = pd.DataFrame.from_dict(bests, orient='index')
- dat.to_csv(Util.uniquefilename(file_synthetic.replace("synthetic","best")), sep=";", index=False)
-
- q05 = []
- q25 = []
- q75 = []
- q95 = []
- labels = []
-
- for b in sorted(bests.keys()):
- if check_ignore_list(b, ignore):
- continue
- best = bests[b]
- df = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
- & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
- q05.append(extract_measure(df, 'Q05', data_columns))
- q25.append(extract_measure(df, 'Q25', data_columns))
- q75.append(extract_measure(df, 'Q75', data_columns))
- q95.append(extract_measure(df, 'Q95', data_columns))
- labels.append(check_replace_list(best["Model"] + " " + str(best["Order"]), replace))
-
- axes[0].boxplot(q05, labels=labels, vert=False, autorange=True, showmeans=True)
- axes[1].boxplot(q25, labels=labels, vert=False, autorange=True, showmeans=True)
- axes[2].boxplot(q75, labels=labels, vert=False, autorange=True, showmeans=True)
- axes[3].boxplot(q95, labels=labels, vert=False, autorange=True, showmeans=True)
-
- plt.tight_layout()
-
- Util.show_and_save_image(fig, file, save)
-
-
-[docs]def save_dataframe_probabilistic(experiments, file, objs, crps, times, save, synthetic, steps, method):
- """
- Save benchmark results for m-step ahead probabilistic forecasters
- :param experiments:
- :param file:
- :param objs:
- :param crps_interval:
- :param crps_distr:
- :param times:
- :param times2:
- :param save:
- :param synthetic:
- :return:
- """
- ret = []
-
- if synthetic:
-
- for k in sorted(objs.keys()):
- try:
- ret = []
- for k in sorted(objs.keys()):
- try:
- mod = []
- mfts = objs[k]
- mod.append(mfts.shortname)
- mod.append(mfts.order)
- if not mfts.benchmark_only:
- mod.append(mfts.partitioner.name)
- mod.append(mfts.partitioner.partitions)
- mod.append(len(mfts))
- else:
- mod.append('-')
- mod.append('-')
- mod.append('-')
- mod.append(steps[k])
- mod.append(method[k])
- mod.append(np.round(np.nanmean(crps[k]), 2))
- mod.append(np.round(np.nanstd(crps[k]), 2))
- mod.append(np.round(np.nanmean(times[k]), 4))
- mod.append(np.round(np.nanstd(times[k]), 4))
- ret.append(mod)
- except Exception as e:
- print('Erro: %s' % e)
- except Exception as ex:
- print("Erro ao salvar ", k)
- print("Exceção ", ex)
-
- columns = probabilistic_dataframe_synthetic_columns()
- else:
- for k in sorted(objs.keys()):
- try:
- mfts = objs[k]
- n = mfts.shortname
- o = mfts.order
- if not mfts.benchmark_only:
- s = mfts.partitioner.name
- p = mfts.partitioner.partitions
- l = len(mfts)
- else:
- s = '-'
- p = '-'
- l = '-'
- st = steps[k]
- mt = method[k]
- tmp = [n, o, s, p, l, st, mt, 'CRPS']
- tmp.extend(crps[k])
- ret.append(deepcopy(tmp))
- tmp = [n, o, s, p, l, st, mt, 'TIME']
- tmp.extend(times[k])
- ret.append(deepcopy(tmp))
- except Exception as ex:
- print("Erro ao salvar ", k)
- print("Exceção ", ex)
- columns = probabilistic_dataframe_analytic_columns(experiments)
- dat = pd.DataFrame(ret, columns=columns)
- if save: dat.to_csv(Util.uniquefilename(file), sep=";")
- return dat
-
-
-[docs]def probabilistic_dataframe_analytic_columns(experiments):
- columns = [str(k) for k in np.arange(0, experiments)]
- columns.insert(0, "Model")
- columns.insert(1, "Order")
- columns.insert(2, "Scheme")
- columns.insert(3, "Partitions")
- columns.insert(4, "Size")
- columns.insert(5, "Steps")
- columns.insert(6, "Method")
- columns.insert(7, "Measure")
- return columns
-
-
-[docs]def probabilistic_dataframe_synthetic_columns():
- columns = ["Model", "Order", "Scheme", "Partitions","Size", "Steps", "Method", "CRPSAVG", "CRPSSTD",
- "TIMEAVG", "TIMESTD"]
- return columns
-
-
-[docs]def cast_dataframe_to_synthetic_probabilistic(df, data_columns):
- crps1 = extract_measure(df, 'CRPS', data_columns)
- times1 = extract_measure(df, 'TIME', data_columns)
- ret = []
- ret.append(np.round(np.nanmean(crps1), 2))
- ret.append(np.round(np.nanstd(crps1), 2))
- ret.append(np.round(np.nanmean(times1), 2))
- ret.append(np.round(np.nanstd(times1), 2))
- return ret
-
-
-[docs]def unified_scaled_probabilistic(experiments, tam, save=False, file=None,
- sort_columns=['CRPSAVG', 'CRPSSTD'],
- sort_ascend=[True, True], save_best=False,
- ignore=None, replace=None):
- fig, axes = plt.subplots(nrows=1, ncols=1, figsize=tam)
-
- axes.set_title('CRPS')
- #axes[1].set_title('CRPS Distribution Ahead')
-
- models = {}
-
- for experiment in experiments:
-
- print(experiment)
-
- mdl = {}
-
- dat_syn = pd.read_csv(experiment[0], sep=";", usecols=probabilistic_dataframe_synthetic_columns())
-
- bests = find_best(dat_syn, sort_columns, sort_ascend)
-
- dat_ana = pd.read_csv(experiment[1], sep=";", usecols=probabilistic_dataframe_analytic_columns(experiment[2]))
-
- crps1 = []
- crps2 = []
-
- data_columns = analytical_data_columns(experiment[2])
-
- for b in sorted(bests.keys()):
- if check_ignore_list(b, ignore):
- continue
-
- if b not in models:
- models[b] = {}
- models[b]['crps1'] = []
- models[b]['crps2'] = []
-
- if b not in mdl:
- mdl[b] = {}
- mdl[b]['crps1'] = []
- mdl[b]['crps2'] = []
-
- best = bests[b]
-
- print(best)
-
- tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
- & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
- tmpl = extract_measure(tmp, 'CRPS_Interval', data_columns)
- mdl[b]['crps1'].extend(tmpl)
- crps1.extend(tmpl)
- tmpl = extract_measure(tmp, 'CRPS_Distribution', data_columns)
- mdl[b]['crps2'].extend(tmpl)
- crps2.extend(tmpl)
-
- models[b]['label'] = check_replace_list(best["Model"] + " " + str(best["Order"]), replace)
-
- crps1_param = scale_params(crps1)
- crps2_param = scale_params(crps2)
-
- for key in sorted(mdl.keys()):
- print(key)
- models[key]['crps1'].extend(scale(mdl[key]['crps1'], crps1_param))
- models[key]['crps2'].extend(scale(mdl[key]['crps2'], crps2_param))
-
- crps1 = []
- crps2 = []
- labels = []
- for key in sorted(models.keys()):
- crps1.append(models[key]['crps1'])
- crps2.append(models[key]['crps2'])
- labels.append(models[key]['label'])
-
- axes[0].boxplot(crps1, labels=labels, autorange=True, showmeans=True)
- axes[1].boxplot(crps2, labels=labels, autorange=True, showmeans=True)
-
- plt.tight_layout()
-
- Util.show_and_save_image(fig, file, save)
-
-
-
-[docs]def plot_dataframe_probabilistic(file_synthetic, file_analytic, experiments, tam, save=False, file=None,
- sort_columns=['CRPS1AVG', 'CRPS2AVG', 'CRPS1STD', 'CRPS2STD'],
- sort_ascend=[True, True, True, True], save_best=False,
- ignore=None, replace=None):
-
- fig, axes = plt.subplots(nrows=2, ncols=1, figsize=tam)
-
- axes[0].set_title('CRPS')
- axes[1].set_title('CRPS')
-
- dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=probabilistic_dataframe_synthetic_columns())
-
- bests = find_best(dat_syn, sort_columns, sort_ascend)
-
- dat_ana = pd.read_csv(file_analytic, sep=";", usecols=probabilistic_dataframe_analytic_columns(experiments))
-
- data_columns = analytical_data_columns(experiments)
-
- if save_best:
- dat = pd.DataFrame.from_dict(bests, orient='index')
- dat.to_csv(Util.uniquefilename(file_synthetic.replace("synthetic","best")), sep=";", index=False)
-
- crps1 = []
- crps2 = []
- labels = []
-
- for b in sorted(bests.keys()):
- if check_ignore_list(b, ignore):
- continue
- best = bests[b]
- df = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
- & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
- crps1.append( extract_measure(df,'CRPS_Interval',data_columns) )
- crps2.append(extract_measure(df, 'CRPS_Distribution', data_columns))
- labels.append(check_replace_list(best["Model"] + " " + str(best["Order"]), replace))
-
- axes[0].boxplot(crps1, labels=labels, autorange=True, showmeans=True)
- axes[1].boxplot(crps2, labels=labels, autorange=True, showmeans=True)
-
- plt.tight_layout()
- Util.show_and_save_image(fig, file, save)
-
-
Source code for pyFTS.benchmarks.arima
-#!/usr/bin/python
-# -*- coding: utf8 -*-
-
-import numpy as np
-import pandas as pd
-from statsmodels.tsa.arima_model import ARIMA as stats_arima
-import scipy.stats as st
-from pyFTS.common import SortedCollection, fts
-from pyFTS.probabilistic import ProbabilityDistribution
-
-
-[docs]class ARIMA(fts.FTS):
- """
- Façade for statsmodels.tsa.arima_model
- """
- def __init__(self, **kwargs):
- super(ARIMA, self).__init__(**kwargs)
- self.name = "ARIMA"
- self.detail = "Auto Regressive Integrated Moving Average"
- self.is_high_order = True
- self.has_point_forecasting = True
- self.has_interval_forecasting = True
- self.has_probability_forecasting = True
- self.model = None
- self.model_fit = None
- self.trained_data = None
- self.p = 1
- self.d = 0
- self.q = 0
- self.benchmark_only = True
- self.min_order = 1
- self.alpha = kwargs.get("alpha", 0.05)
- self.order = kwargs.get("order", (1,0,0))
- self._decompose_order(self.order)
-
- def _decompose_order(self, order):
- if isinstance(order, (tuple, set, list)):
- self.p = order[0]
- self.d = order[1]
- self.q = order[2]
- self.order = self.p + self.q + (self.q - 1 if self.q > 0 else 0)
- self.max_lag = self.order
- self.d = len(self.transformations)
- self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha)
-
-[docs] def train(self, data, **kwargs):
-
- if 'order' in kwargs:
- order = kwargs.pop('order')
- self._decompose_order(order)
-
- if self.indexer is not None:
- data = self.indexer.get_data(data)
-
- try:
- self.model = stats_arima(data, order=(self.p, self.d, self.q))
- self.model_fit = self.model.fit(disp=0)
- except Exception as ex:
- print(ex)
- self.model_fit = None
-
-
-
-
-
-[docs] def forecast(self, ndata, **kwargs):
- if self.model_fit is None:
- return np.nan
-
- ndata = np.array(ndata)
-
- l = len(ndata)
-
- ret = []
-
- ar = np.array([self.ar(ndata[k - self.p: k]) for k in np.arange(self.p, l+1)]) #+1 to forecast one step ahead given all available lags
-
- if self.q > 0:
- residuals = ndata[self.p-1:] - ar
-
- ma = np.array([self.ma(residuals[k - self.q: k]) for k in np.arange(self.q, len(residuals) + 1)])
-
- ret = ar[self.q - 1:] + ma
- ret = ret[self.q:]
- else:
- ret = ar
-
- #ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]]) nforecasts = np.array(forecasts)
-
- return ret
-
-[docs] def forecast_interval(self, data, **kwargs):
-
- if self.model_fit is None:
- return np.nan
-
- sigma = np.sqrt(self.model_fit.sigma2)
-
- l = len(data)
-
- ret = []
-
- for k in np.arange(self.order, l+1):
- tmp = []
-
- sample = [data[i] for i in np.arange(k - self.order, k)]
-
- mean = self.forecast(sample)
-
- if isinstance(mean,(list, np.ndarray)):
- mean = mean[0]
-
- tmp.append(mean + st.norm.ppf(self.alpha) * sigma)
- tmp.append(mean + st.norm.ppf(1 - self.alpha) * sigma)
-
- ret.append(tmp)
-
- return ret
-
-[docs] def forecast_ahead_interval(self, ndata, steps, **kwargs):
- if self.model_fit is None:
- return np.nan
-
- smoothing = kwargs.get("smoothing",0.5)
-
- sigma = np.sqrt(self.model_fit.sigma2)
-
- l = len(ndata)
-
- nmeans = self.forecast_ahead(ndata, steps, **kwargs)
-
- ret = []
-
- for k in np.arange(0, steps):
- tmp = []
-
- hsigma = (1 + k*smoothing)*sigma
-
- tmp.append(nmeans[k] + st.norm.ppf(self.alpha) * hsigma)
- tmp.append(nmeans[k] + st.norm.ppf(1 - self.alpha) * hsigma)
-
- ret.append(tmp)
-
- return ret
-
-[docs] def forecast_distribution(self, data, **kwargs):
-
- sigma = np.sqrt(self.model_fit.sigma2)
-
- l = len(data)
-
- ret = []
-
- for k in np.arange(self.order, l + 1):
- sample = [data[i] for i in np.arange(k - self.order, k)]
-
- mean = self.forecast(sample)
-
- if isinstance(mean, (list, np.ndarray)):
- mean = mean[0]
-
- dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram", uod=[self.original_min, self.original_max])
- intervals = []
- for alpha in np.arange(0.05, 0.5, 0.05):
-
- qt1 = mean + st.norm.ppf(alpha) * sigma
- qt2 = mean + st.norm.ppf(1 - alpha) * sigma
-
- intervals.append([qt1, qt2])
-
- dist.append_interval(intervals)
-
- ret.append(dist)
-
- return ret
-
-
-[docs] def forecast_ahead_distribution(self, data, steps, **kwargs):
- smoothing = kwargs.get("smoothing", 0.5)
-
- sigma = np.sqrt(self.model_fit.sigma2)
-
- l = len(data)
-
- ret = []
-
- nmeans = self.forecast_ahead(data, steps, **kwargs)
-
- for k in np.arange(0, steps):
- dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram",
- uod=[self.original_min, self.original_max])
- intervals = []
- for alpha in np.arange(0.05, 0.5, 0.05):
- tmp = []
-
- hsigma = (1 + k * smoothing) * sigma
-
- tmp.append(nmeans[k] + st.norm.ppf(alpha) * hsigma)
- tmp.append(nmeans[k] + st.norm.ppf(1 - alpha) * hsigma)
-
- intervals.append(tmp)
-
- dist.append_interval(intervals)
-
- ret.append(dist)
-
- return ret
-
Source code for pyFTS.benchmarks.benchmarks
-#!/usr/bin/python
-# -*- coding: utf8 -*-
-
-"""Benchmarks methods for FTS methods"""
-
-
-import datetime
-import time
-from copy import deepcopy
-import traceback
-
-import matplotlib as plt
-
-import matplotlib.pyplot as plt
-import numpy as np
-from mpl_toolkits.mplot3d import Axes3D
-
-from pyFTS.common import Transformations
-from pyFTS.models import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, hwang
-from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts
-from pyFTS.models.ensemble import ensemble
-from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, quantreg, knn
-from pyFTS.benchmarks import Util as bUtil
-from pyFTS.common import Util as cUtil
-# from sklearn.cross_validation import KFold
-from pyFTS.partitioners import Grid
-
-colors = ['grey', 'darkgrey', 'rosybrown', 'maroon', 'red','orange', 'gold', 'yellow', 'olive', 'green',
- 'darkgreen', 'cyan', 'lightblue','blue', 'darkblue', 'purple', 'darkviolet' ]
-
-ncol = len(colors)
-
-styles = ['-','--','-.',':','.']
-
-nsty = len(styles)
-
-
-def __pop(key, default, kwargs):
- if key in kwargs:
- return kwargs.pop(key)
- else:
- return default
-
-
-[docs]def get_benchmark_point_methods():
- """Return all non FTS methods for point forecasting"""
- return [naive.Naive, arima.ARIMA, quantreg.QuantileRegression]
-
-
-[docs]def get_point_methods():
- """Return all FTS methods for point forecasting"""
- return [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
- cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS,
- hofts.HighOrderFTS, hofts.WeightedHighOrderFTS, hwang.HighOrderFTS,
- pwfts.ProbabilisticWeightedFTS]
-
-
-[docs]def get_point_multivariate_methods():
- """Return all multivariate FTS methods por point forecasting"""
- return [mvfts.MVFTS, wmvfts.WeightedMVFTS, cmvfts.ClusteredMVFTS]
-
-
-[docs]def get_benchmark_interval_methods():
- """Return all non FTS methods for point_to_interval forecasting"""
- return [ arima.ARIMA, quantreg.QuantileRegression]
-
-
-[docs]def get_interval_methods():
- """Return all FTS methods for point_to_interval forecasting"""
- return [ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS]
-
-
-[docs]def get_probabilistic_methods():
- """Return all FTS methods for probabilistic forecasting"""
- return [ensemble.AllMethodEnsembleFTS, pwfts.ProbabilisticWeightedFTS]
-
-
-[docs]def get_benchmark_probabilistic_methods():
- """Return all FTS methods for probabilistic forecasting"""
- return [arima.ARIMA, quantreg.QuantileRegression, knn.KNearestNeighbors]
-
-
-[docs]def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
- """
- Sliding window benchmarks for FTS forecasters.
-
- For each data window, a train and test datasets will be splitted. For each train split, number of
- partitions and partitioning method will be created a partitioner model. And for each partitioner, order,
- steps ahead and FTS method a foreasting model will be trained.
-
- Then all trained models are benchmarked on the test data and the metrics are stored on a sqlite3 database
- (identified by the 'file' parameter) for posterior analysis.
-
- All these process can be distributed on a dispy cluster, setting the atributed 'distributed' to true and
- informing the list of dispy nodes on 'nodes' parameter.
-
- The number of experiments is determined by 'windowsize' and 'inc' parameters.
-
- :param data: test data
- :param windowsize: size of sliding window
- :param train: percentual of sliding window data used to train the models
- :param kwargs: dict, optional arguments
-
- :keyword benchmark_methods: a list with Non FTS models to benchmark. The default is None.
- :keyword benchmark_methods_parameters: a list with Non FTS models parameters. The default is None.
- :keyword benchmark_models: A boolean value indicating if external FTS methods will be used on benchmark. The default is False.
- :keyword build_methods: A boolean value indicating if the default FTS methods will be used on benchmark. The default is True.
- :keyword dataset: the dataset name to identify the current set of benchmarks results on database.
- :keyword distributed: A boolean value indicating if the forecasting procedure will be distributed in a dispy cluster. . The default is False
- :keyword file: file path to save the results. The default is benchmarks.db.
- :keyword inc: a float on interval [0,1] indicating the percentage of the windowsize to move the window
- :keyword methods: a list with FTS class names. The default depends on the forecasting type and contains the list of all FTS methods.
- :keyword models: a list with prebuilt FTS objects. The default is None.
- :keyword nodes: a list with the dispy cluster nodes addresses. The default is [127.0.0.1].
- :keyword orders: a list with orders of the models (for high order models). The default is [1,2,3].
- :keyword partitions: a list with the numbers of partitions on the Universe of Discourse. The default is [10].
- :keyword partitioners_models: a list with prebuilt Universe of Discourse partitioners objects. The default is None.
- :keyword partitioners_methods: a list with Universe of Discourse partitioners class names. The default is [partitioners.Grid.GridPartitioner].
- :keyword progress: If true a progress bar will be displayed during the benchmarks. The default is False.
- :keyword start: in the multi step forecasting, the index of the data where to start forecasting. The default is 0.
- :keyword steps_ahead: a list with the forecasting horizons, i. e., the number of steps ahead to forecast. The default is 1.
- :keyword tag: a name to identify the current set of benchmarks results on database.
- :keyword type: the forecasting type, one of these values: point(default), interval or distribution. The default is point.
- :keyword transformations: a list with data transformations do apply . The default is [None].
- """
-
- tag = __pop('tag', None, kwargs)
- dataset = __pop('dataset', None, kwargs)
-
- distributed = __pop('distributed', False, kwargs)
-
- transformations = kwargs.get('transformations', [None])
- progress = kwargs.get('progress', None)
- type = kwargs.get("type", 'point')
-
- orders = __pop("orders", [1,2,3], kwargs)
-
- partitioners_models = __pop("partitioners_models", None, kwargs)
- partitioners_methods = __pop("partitioners_methods", [Grid.GridPartitioner], kwargs)
- partitions = __pop("partitions", [10], kwargs)
-
- steps_ahead = __pop('steps_ahead', [1], kwargs)
-
- methods = __pop('methods', None, kwargs)
-
- models = __pop('models', None, kwargs)
-
- pool = [] if models is None else models
-
- if methods is None:
- if type == 'point':
- methods = get_point_methods()
- elif type == 'interval':
- methods = get_interval_methods()
- elif type == 'distribution':
- methods = get_probabilistic_methods()
-
- build_methods = __pop("build_methods", True, kwargs)
-
- if build_methods:
- for method in methods:
- mfts = method()
-
- if mfts.is_high_order:
- for order in orders:
- if order >= mfts.min_order:
- mfts = method()
- mfts.order = order
- pool.append(mfts)
- else:
- mfts.order = 1
- pool.append(mfts)
-
- benchmark_models = __pop("benchmark_models", False, kwargs)
-
- if benchmark_models != False:
-
- benchmark_methods = __pop("benchmark_methods", None, kwargs)
- benchmark_methods_parameters = __pop("benchmark_methods_parameters", None, kwargs)
-
- benchmark_pool = [] if ( benchmark_models is None or not isinstance(benchmark_models, list)) \
- else benchmark_models
-
- if benchmark_models is None and benchmark_methods is None:
- if type == 'point'or type == 'partition':
- benchmark_methods = get_benchmark_point_methods()
- elif type == 'interval':
- benchmark_methods = get_benchmark_interval_methods()
- elif type == 'distribution':
- benchmark_methods = get_benchmark_probabilistic_methods()
-
- if benchmark_methods is not None:
- for transformation in transformations:
- for count, model in enumerate(benchmark_methods, start=0):
- par = benchmark_methods_parameters[count]
- mfts = model(**par)
- mfts.append_transformation(transformation)
- benchmark_pool.append(mfts)
-
- if type == 'point':
- experiment_method = run_point
- synthesis_method = process_point_jobs
- elif type == 'interval':
- experiment_method = run_interval
- synthesis_method = process_interval_jobs
- elif type == 'distribution':
- experiment_method = run_probabilistic
- synthesis_method = process_probabilistic_jobs
- else:
- raise ValueError("Type parameter has a unkown value!")
-
- if distributed:
- import pyFTS.distributed.dispy as dispy
-
- nodes = kwargs.get("nodes", ['127.0.0.1'])
- cluster, http_server = dispy.start_dispy_cluster(experiment_method, nodes)
-
- jobs = []
-
- inc = __pop("inc", 0.1, kwargs)
-
- if progress:
- from tqdm import tqdm
- _tdata = len(data) / (windowsize * inc)
- _tasks = (len(partitioners_models) * len(orders) * len(partitions) * len(transformations) * len(steps_ahead))
- _tbcmk = len(benchmark_pool)*len(steps_ahead)
- progressbar = tqdm(total=_tdata*_tasks + _tdata*_tbcmk, desc="Benchmarks:")
-
- file = kwargs.get('file', "benchmarks.db")
-
- conn = bUtil.open_benchmark_db(file)
-
- for ct, train, test in cUtil.sliding_window(data, windowsize, train, inc=inc, **kwargs):
- if benchmark_models != False:
- for model in benchmark_pool:
- for step in steps_ahead:
-
- kwargs['steps_ahead'] = step
-
- if not distributed:
- if progress:
- progressbar.update(1)
- try:
- job = experiment_method(deepcopy(model), None, train, test, **kwargs)
- synthesis_method(dataset, tag, job, conn)
- except Exception as ex:
- print('EXCEPTION! ', model.shortname, model.order)
- traceback.print_exc()
- else:
- job = cluster.submit(deepcopy(model), None, train, test, **kwargs)
- jobs.append(job)
-
- partitioners_pool = []
-
- if partitioners_models is None:
-
- for transformation in transformations:
-
- for partition in partitions:
-
- for partitioner in partitioners_methods:
-
- data_train_fs = partitioner(data=train, npart=partition, transformation=transformation)
-
- partitioners_pool.append(data_train_fs)
- else:
- partitioners_pool = partitioners_models
-
- for step in steps_ahead:
-
- for partitioner in partitioners_pool:
-
- for _id, model in enumerate(pool,start=0):
-
- kwargs['steps_ahead'] = step
-
- if not distributed:
- if progress:
- progressbar.update(1)
- try:
- job = experiment_method(deepcopy(model), deepcopy(partitioner), train, test, **kwargs)
- synthesis_method(dataset, tag, job, conn)
- except Exception as ex:
- print('EXCEPTION! ',model.shortname, model.order, partitioner.name,
- partitioner.partitions, str(partitioner.transformation))
- traceback.print_exc()
- else:
- job = cluster.submit(deepcopy(model), deepcopy(partitioner), train, test, **kwargs)
- job.id = id # associate an ID to identify jobs (if needed later)
- jobs.append(job)
-
- if progress:
- progressbar.close()
-
- if distributed:
-
- for job in jobs:
- if progress:
- progressbar.update(1)
- job()
- if job.status == dispy.dispy.DispyJob.Finished and job is not None:
- tmp = job.result
- synthesis_method(dataset, tag, tmp, conn)
- else:
- print("status",job.status)
- print("result",job.result)
- print("stdout",job.stdout)
- print("stderr",job.exception)
-
- cluster.wait() # wait for all jobs to finish
-
- dispy.stop_dispy_cluster(cluster, http_server)
-
- conn.close()
-
-
-
-
-[docs]def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwargs):
- """
- Run the point forecasting benchmarks
-
- :param mfts: FTS model
- :param partitioner: Universe of Discourse partitioner
- :param train_data: data used to train the model
- :param test_data: ata used to test the model
- :param window_key: id of the sliding window
- :param transformation: data transformation
- :param indexer: seasonal indexer
- :return: a dictionary with the benchmark results
- """
- import time
- from pyFTS.models import yu, chen, hofts, pwfts,ismailefendi,sadaei, song, cheng, hwang
- from pyFTS.partitioners import Grid, Entropy, FCM
- from pyFTS.benchmarks import Measures, naive, arima, quantreg
- from pyFTS.common import Transformations
-
- tmp = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
- cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, hwang.HighOrderFTS,
- pwfts.ProbabilisticWeightedFTS]
-
- tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
-
- tmp4 = [naive.Naive, arima.ARIMA, quantreg.QuantileRegression]
-
- tmp3 = [Measures.get_point_statistics]
-
- tmp5 = [Transformations.Differential]
-
- indexer = kwargs.get('indexer', None)
-
- steps_ahead = kwargs.get('steps_ahead', 1)
- method = kwargs.get('method', None)
-
- if mfts.benchmark_only:
- _key = mfts.shortname + str(mfts.order if mfts.order is not None else "")
- else:
- pttr = str(partitioner.__module__).split('.')[-1]
- _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
- mfts.partitioner = partitioner
- mfts.append_transformation(partitioner.transformation)
-
- _key += str(steps_ahead)
- _key += str(method) if method is not None else ""
-
- _start = time.time()
- mfts.fit(train_data, **kwargs)
- _end = time.time()
- times = _end - _start
-
-
- _start = time.time()
- _rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, **kwargs)
- _end = time.time()
- times += _end - _start
-
- ret = {'key': _key, 'obj': mfts, 'rmse': _rmse, 'smape': _smape, 'u': _u, 'time': times, 'window': window_key,
- 'steps': steps_ahead, 'method': method}
-
- return ret
-
-
-[docs]def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kwargs):
- """
- Run the interval forecasting benchmarks
-
- :param mfts: FTS model
- :param partitioner: Universe of Discourse partitioner
- :param train_data: data used to train the model
- :param test_data: ata used to test the model
- :param window_key: id of the sliding window
- :param transformation: data transformation
- :param indexer: seasonal indexer
- :return: a dictionary with the benchmark results
- """
- import time
- from pyFTS.models import hofts,ifts,pwfts
- from pyFTS.partitioners import Grid, Entropy, FCM
- from pyFTS.benchmarks import Measures, arima, quantreg
-
- tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS]
-
- tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
-
- tmp4 = [arima.ARIMA, quantreg.QuantileRegression]
-
- tmp3 = [Measures.get_interval_statistics]
-
- steps_ahead = kwargs.get('steps_ahead', 1)
- method = kwargs.get('method', None)
-
- if mfts.benchmark_only:
- _key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
- else:
- pttr = str(partitioner.__module__).split('.')[-1]
- _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
- mfts.partitioner = partitioner
- mfts.append_transformation(partitioner.transformation)
-
- _key += str(steps_ahead)
- _key += str(method) if method is not None else ""
-
- _start = time.time()
- mfts.fit(train_data, **kwargs)
- _end = time.time()
- times = _end - _start
-
- _start = time.time()
- #_sharp, _res, _cov, _q05, _q25, _q75, _q95, _w05, _w25
- metrics = Measures.get_interval_statistics(test_data, mfts, **kwargs)
- _end = time.time()
- times += _end - _start
-
- ret = {'key': _key, 'obj': mfts, 'sharpness': metrics[0], 'resolution': metrics[1], 'coverage': metrics[2],
- 'time': times,'Q05': metrics[3], 'Q25': metrics[4], 'Q75': metrics[5], 'Q95': metrics[6],
- 'winkler05': metrics[7], 'winkler25': metrics[8],
- 'window': window_key,'steps': steps_ahead, 'method': method}
-
- return ret
-
-
-[docs]def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None, **kwargs):
- """
- Run the probabilistic forecasting benchmarks
-
- :param mfts: FTS model
- :param partitioner: Universe of Discourse partitioner
- :param train_data: data used to train the model
- :param test_data: ata used to test the model
- :param steps:
- :param resolution:
- :param window_key: id of the sliding window
- :param transformation: data transformation
- :param indexer: seasonal indexer
- :return: a dictionary with the benchmark results
- """
- import time
- import numpy as np
- from pyFTS.models import hofts, ifts, pwfts
- from pyFTS.models.ensemble import ensemble
- from pyFTS.partitioners import Grid, Entropy, FCM
- from pyFTS.benchmarks import Measures, arima, quantreg, knn
- from pyFTS.models.seasonal import SeasonalIndexer
-
- tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS, arima.ARIMA,
- ensemble.AllMethodEnsembleFTS, knn.KNearestNeighbors]
-
- tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
-
- tmp3 = [Measures.get_distribution_statistics, SeasonalIndexer.SeasonalIndexer, SeasonalIndexer.LinearSeasonalIndexer]
-
- indexer = kwargs.get('indexer', None)
-
- steps_ahead = kwargs.get('steps_ahead', 1)
- method = kwargs.get('method', None)
-
- if mfts.benchmark_only:
- _key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
- else:
- pttr = str(partitioner.__module__).split('.')[-1]
- _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
- mfts.partitioner = partitioner
- mfts.append_transformation(partitioner.transformation)
-
- _key += str(steps_ahead)
- _key += str(method) if method is not None else ""
-
- if mfts.has_seasonality:
- mfts.indexer = indexer
-
- _start = time.time()
- mfts.fit(train_data, **kwargs)
- _end = time.time()
- times = _end - _start
-
- _crps1, _t1, _brier = Measures.get_distribution_statistics(test_data, mfts, **kwargs)
- _t1 += times
-
- ret = {'key': _key, 'obj': mfts, 'CRPS': _crps1, 'time': _t1, 'brier': _brier, 'window': window_key,
- 'steps': steps_ahead, 'method': method}
-
- return ret
-
-
-[docs]def process_point_jobs(dataset, tag, job, conn):
- """
- Extract information from a dictionary with point benchmark results and save it on a database
-
- :param dataset: the benchmark dataset name
- :param tag: alias for the benchmark group being executed
- :param job: a dictionary with the benchmark results
- :param conn: a connection to a Sqlite database
- :return:
- """
-
- data = bUtil.process_common_data(dataset, tag, 'point',job)
-
- rmse = deepcopy(data)
- rmse.extend(["rmse", job["rmse"]])
- bUtil.insert_benchmark(rmse, conn)
- smape = deepcopy(data)
- smape.extend(["smape", job["smape"]])
- bUtil.insert_benchmark(smape, conn)
- u = deepcopy(data)
- u.extend(["u", job["u"]])
- bUtil.insert_benchmark(u, conn)
- time = deepcopy(data)
- time.extend(["time", job["time"]])
- bUtil.insert_benchmark(time, conn)
-
-
-[docs]def process_interval_jobs(dataset, tag, job, conn):
- """
- Extract information from an dictionary with interval benchmark results and save it on a database
-
- :param dataset: the benchmark dataset name
- :param tag: alias for the benchmark group being executed
- :param job: a dictionary with the benchmark results
- :param conn: a connection to a Sqlite database
- :return:
- """
-
- data = bUtil.process_common_data(dataset, tag, 'interval', job)
-
- sharpness = deepcopy(data)
- sharpness.extend(["sharpness", job["sharpness"]])
- bUtil.insert_benchmark(sharpness, conn)
- resolution = deepcopy(data)
- resolution.extend(["resolution", job["resolution"]])
- bUtil.insert_benchmark(resolution, conn)
- coverage = deepcopy(data)
- coverage.extend(["coverage", job["coverage"]])
- bUtil.insert_benchmark(coverage, conn)
- time = deepcopy(data)
- time.extend(["time", job["time"]])
- bUtil.insert_benchmark(time, conn)
- Q05 = deepcopy(data)
- Q05.extend(["Q05", job["Q05"]])
- bUtil.insert_benchmark(Q05, conn)
- Q25 = deepcopy(data)
- Q25.extend(["Q25", job["Q25"]])
- bUtil.insert_benchmark(Q25, conn)
- Q75 = deepcopy(data)
- Q75.extend(["Q75", job["Q75"]])
- bUtil.insert_benchmark(Q75, conn)
- Q95 = deepcopy(data)
- Q95.extend(["Q95", job["Q95"]])
- bUtil.insert_benchmark(Q95, conn)
- W05 = deepcopy(data)
- W05.extend(["winkler05", job["winkler05"]])
- bUtil.insert_benchmark(W05, conn)
- W25 = deepcopy(data)
- W25.extend(["winkler25", job["winkler25"]])
- bUtil.insert_benchmark(W25, conn)
-
-
-[docs]def process_probabilistic_jobs(dataset, tag, job, conn):
- """
- Extract information from an dictionary with probabilistic benchmark results and save it on a database
-
- :param dataset: the benchmark dataset name
- :param tag: alias for the benchmark group being executed
- :param job: a dictionary with the benchmark results
- :param conn: a connection to a Sqlite database
- :return:
- """
-
- data = bUtil.process_common_data(dataset, tag, 'density', job)
-
- crps = deepcopy(data)
- crps.extend(["crps",job["CRPS"]])
- bUtil.insert_benchmark(crps, conn)
- time = deepcopy(data)
- time.extend(["time", job["time"]])
- bUtil.insert_benchmark(time, conn)
- brier = deepcopy(data)
- brier.extend(["brier", job["brier"]])
- bUtil.insert_benchmark(brier, conn)
-
-
-[docs]def print_point_statistics(data, models, externalmodels = None, externalforecasts = None, indexers=None):
- """
- Run point benchmarks on given models and data and print the results
-
- :param data: test data
- :param models: a list of FTS models to benchmark
- :param externalmodels: a list with benchmark models (façades for other methods)
- :param externalforecasts:
- :param indexers:
- :return:
- """
- ret = "Model & Order & RMSE & SMAPE & Theil's U \\\\ \n"
- for count,model in enumerate(models,start=0):
- _rmse, _smape, _u = Measures.get_point_statistics(data, model, indexers)
- ret += model.shortname + " & "
- ret += str(model.order) + " & "
- ret += str(_rmse) + " & "
- ret += str(_smape)+ " & "
- ret += str(_u)
- #ret += str(round(Measures.TheilsInequality(np.array(data[fts.order:]), np.array(forecasts[:-1])), 4))
- ret += " \\\\ \n"
- if externalmodels is not None:
- l = len(externalmodels)
- for k in np.arange(0,l):
- ret += externalmodels[k] + " & "
- ret += " 1 & "
- ret += str(round(Measures.rmse(data, externalforecasts[k][:-1]), 2)) + " & "
- ret += str(round(Measures.smape(data, externalforecasts[k][:-1]), 2))+ " & "
- ret += str(round(Measures.UStatistic(data, externalforecasts[k][:-1]), 2))
- ret += " \\\\ \n"
- print(ret)
-
-
-[docs]def print_interval_statistics(original, models):
- """
- Run interval benchmarks on given models and data and print the results
-
- :param data: test data
- :param models: a list of FTS models to benchmark
- :return:
- """
- ret = "Model & Order & Sharpness & Resolution & Coverage & .05 & .25 & .75 & .95 \\\\ \n"
- for fts in models:
- _sharp, _res, _cov, _q5, _q25, _q75, _q95 = Measures.get_interval_statistics(original, fts)
- ret += fts.shortname + " & "
- ret += str(fts.order) + " & "
- ret += str(_sharp) + " & "
- ret += str(_res) + " & "
- ret += str(_cov) + " &"
- ret += str(_q5) + " &"
- ret += str(_q25) + " &"
- ret += str(_q75) + " &"
- ret += str(_q95) + "\\\\ \n"
- print(ret)
-
-
-[docs]def print_distribution_statistics(original, models, steps, resolution):
- """
- Run probabilistic benchmarks on given models and data and print the results
-
- :param data: test data
- :param models: a list of FTS models to benchmark
- :return:
- """
- ret = "Model & Order & Interval & Distribution \\\\ \n"
- for fts in models:
- _crps1, _crps2, _t1, _t2 = Measures.get_distribution_statistics(original, fts, steps, resolution)
- ret += fts.shortname + " & "
- ret += str(fts.order) + " & "
- ret += str(_crps1) + " & "
- ret += str(_crps2) + " \\\\ \n"
- print(ret)
-
-
-
-
-
-
-
-
-
-[docs]def plot_point(axis, points, order, label, color='red', ls='-', linewidth=1):
- mi = min(points) * 0.95
- ma = max(points) * 1.05
- for k in np.arange(0, order):
- points.insert(0, None)
- axis.plot(points, color=color, label=label, ls=ls,linewidth=linewidth)
- return [mi, ma]
-
-
-
-[docs]def plot_compared_series(original, models, colors, typeonlegend=False, save=False, file=None, tam=[20, 5],
- points=True, intervals=True, linewidth=1.5):
- """
- Plot the forecasts of several one step ahead models, by point or by interval
-
- :param original: Original time series data (list)
- :param models: List of models to compare
- :param colors: List of models colors
- :param typeonlegend: Add the type of forecast (point / interval) on legend
- :param save: Save the picture on file
- :param file: Filename to save the picture
- :param tam: Size of the picture
- :param points: True to plot the point forecasts, False otherwise
- :param intervals: True to plot the interval forecasts, False otherwise
- :param linewidth:
- :return:
- """
-
- fig = plt.figure(figsize=tam)
- ax = fig.add_subplot(111)
-
- mi = []
- ma = []
-
- legends = []
-
- ax.plot(original, color='black', label="Original", linewidth=linewidth*1.5)
-
- for count, fts in enumerate(models, start=0):
- try:
- if fts.has_point_forecasting and points:
- forecasts = fts.forecast(original)
- if isinstance(forecasts, np.ndarray):
- forecasts = forecasts.tolist()
- mi.append(min(forecasts) * 0.95)
- ma.append(max(forecasts) * 1.05)
- for k in np.arange(0, fts.order):
- forecasts.insert(0, None)
- lbl = fts.shortname + str(fts.order if fts.is_high_order and not fts.benchmark_only else "")
- if typeonlegend: lbl += " (Point)"
- ax.plot(forecasts, color=colors[count], label=lbl, ls="-",linewidth=linewidth)
-
- if fts.has_interval_forecasting and intervals:
- forecasts = fts.forecast_interval(original)
- lbl = fts.shortname + " " + str(fts.order if fts.is_high_order and not fts.benchmark_only else "")
- if not points and intervals:
- ls = "-"
- else:
- ls = "--"
- tmpmi, tmpma = Util.plot_interval(ax, forecasts, fts.order, label=lbl, typeonlegend=typeonlegend,
- color=colors[count], ls=ls, linewidth=linewidth)
- mi.append(tmpmi)
- ma.append(tmpma)
- except ValueError as ex:
- print(fts.shortname)
-
- handles0, labels0 = ax.get_legend_handles_labels()
- lgd = ax.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1))
- legends.append(lgd)
-
- # ax.set_title(fts.name)
- ax.set_ylim([min(mi), max(ma)])
- ax.set_ylabel('F(T)')
- ax.set_xlabel('T')
- ax.set_xlim([0, len(original)])
-
- #Util.show_and_save_image(fig, file, save, lgd=legends)
-
-
-
-
-
-
-[docs]def plotCompared(original, forecasts, labels, title):
- fig = plt.figure(figsize=[13, 6])
- ax = fig.add_subplot(111)
- ax.plot(original, color='k', label="Original")
- for c in range(0, len(forecasts)):
- ax.plot(forecasts[c], label=labels[c])
- handles0, labels0 = ax.get_legend_handles_labels()
- ax.legend(handles0, labels0)
- ax.set_title(title)
- ax.set_ylabel('F(T)')
- ax.set_xlabel('T')
- ax.set_xlim([0, len(original)])
- ax.set_ylim([min(original), max(original)])
-
-
-[docs]def SelecaoSimples_MenorRMSE(original, parameters, modelo):
- ret = []
- errors = []
- forecasted_best = []
- print("Série Original")
- fig = plt.figure(figsize=[20, 12])
- fig.suptitle("Comparação de modelos ")
- ax0 = fig.add_axes([0, 0.5, 0.65, 0.45]) # left, bottom, width, height
- ax0.set_xlim([0, len(original)])
- ax0.set_ylim([min(original), max(original)])
- ax0.set_title('Série Temporal')
- ax0.set_ylabel('F(T)')
- ax0.set_xlabel('T')
- ax0.plot(original, label="Original")
- min_rmse = 100000.0
- best = None
- for p in parameters:
- sets = Grid.GridPartitioner(data=original, npart=p).sets
- fts = modelo(str(p) + " particoes")
- fts.train(original, sets=sets)
- # print(original)
- forecasted = fts.forecast(original)
- forecasted.insert(0, original[0])
- # print(forecasted)
- ax0.plot(forecasted, label=fts.name)
- error = Measures.rmse(np.array(forecasted), np.array(original))
- print(p, error)
- errors.append(error)
- if error < min_rmse:
- min_rmse = error
- best = fts
- forecasted_best = forecasted
- handles0, labels0 = ax0.get_legend_handles_labels()
- ax0.legend(handles0, labels0)
- ax1 = fig.add_axes([0.7, 0.5, 0.3, 0.45]) # left, bottom, width, height
- ax1.set_title('Comparação dos Erros Quadráticos Médios')
- ax1.set_ylabel('RMSE')
- ax1.set_xlabel('Quantidade de Partições')
- ax1.set_xlim([min(parameters), max(parameters)])
- ax1.plot(parameters, errors)
- ret.append(best)
- ret.append(forecasted_best)
- # Modelo diferencial
- print("\nSérie Diferencial")
- difffts = Transformations.differential(original)
- errors = []
- forecastedd_best = []
- ax2 = fig.add_axes([0, 0, 0.65, 0.45]) # left, bottom, width, height
- ax2.set_xlim([0, len(difffts)])
- ax2.set_ylim([min(difffts), max(difffts)])
- ax2.set_title('Série Temporal')
- ax2.set_ylabel('F(T)')
- ax2.set_xlabel('T')
- ax2.plot(difffts, label="Original")
- min_rmse = 100000.0
- bestd = None
- for p in parameters:
- sets = Grid.GridPartitioner(data=difffts, npart=p)
- fts = modelo(str(p) + " particoes")
- fts.train(difffts, sets=sets)
- forecasted = fts.forecast(difffts)
- forecasted.insert(0, difffts[0])
- ax2.plot(forecasted, label=fts.name)
- error = Measures.rmse(np.array(forecasted), np.array(difffts))
- print(p, error)
- errors.append(error)
- if error < min_rmse:
- min_rmse = error
- bestd = fts
- forecastedd_best = forecasted
- handles0, labels0 = ax2.get_legend_handles_labels()
- ax2.legend(handles0, labels0)
- ax3 = fig.add_axes([0.7, 0, 0.3, 0.45]) # left, bottom, width, height
- ax3.set_title('Comparação dos Erros Quadráticos Médios')
- ax3.set_ylabel('RMSE')
- ax3.set_xlabel('Quantidade de Partições')
- ax3.set_xlim([min(parameters), max(parameters)])
- ax3.plot(parameters, errors)
- ret.append(bestd)
- ret.append(forecastedd_best)
- return ret
-
-
-[docs]def compareModelsPlot(original, models_fo, models_ho):
- fig = plt.figure(figsize=[13, 6])
- fig.suptitle("Comparação de modelos ")
- ax0 = fig.add_axes([0, 0, 1, 1]) # left, bottom, width, height
- rows = []
- for model in models_fo:
- fts = model["model"]
- ax0.plot(model["forecasted"], label=model["name"])
- for model in models_ho:
- fts = model["model"]
- ax0.plot(model["forecasted"], label=model["name"])
- handles0, labels0 = ax0.get_legend_handles_labels()
- ax0.legend(handles0, labels0)
-
-
-[docs]def compareModelsTable(original, models_fo, models_ho):
- fig = plt.figure(figsize=[12, 4])
- fig.suptitle("Comparação de modelos ")
- columns = ['Modelo', 'Ordem', 'Partições', 'RMSE', 'MAPE (%)']
- rows = []
- for model in models_fo:
- fts = model["model"]
- error_r = Measures.rmse(model["forecasted"], original)
- error_m = round(Measures.mape(model["forecasted"], original) * 100, 2)
- rows.append([model["name"], fts.order, len(fts.sets), error_r, error_m])
- for model in models_ho:
- fts = model["model"]
- error_r = Measures.rmse(model["forecasted"][fts.order:], original[fts.order:])
- error_m = round(Measures.mape(model["forecasted"][fts.order:], original[fts.order:]) * 100, 2)
- rows.append([model["name"], fts.order, len(fts.sets), error_r, error_m])
- ax1 = fig.add_axes([0, 0, 1, 1]) # left, bottom, width, height
- ax1.set_xticks([])
- ax1.set_yticks([])
- ax1.table(cellText=rows,
- colLabels=columns,
- cellLoc='center',
- bbox=[0, 0, 1, 1])
- sup = "\\begin{tabular}{"
- header = ""
- body = ""
- footer = ""
-
- for c in columns:
- sup = sup + "|c"
- if len(header) > 0:
- header = header + " & "
- header = header + "\\textbf{" + c + "} "
- sup = sup + "|} \\hline\n"
- header = header + "\\\\ \\hline \n"
-
- for r in rows:
- lin = ""
- for c in r:
- if len(lin) > 0:
- lin = lin + " & "
- lin = lin + str(c)
-
- body = body + lin + "\\\\ \\hline \n"
-
- return sup + header + body + "\\end{tabular}"
-
-
-[docs]def simpleSearch_RMSE(train, test, model, partitions, orders, save=False, file=None, tam=[10, 15],
- plotforecasts=False, elev=30, azim=144, intervals=False,parameters=None,
- partitioner=Grid.GridPartitioner,transformation=None,indexer=None):
- _3d = len(orders) > 1
- ret = []
- if _3d:
- errors = np.array([[0 for k in range(len(partitions))] for kk in range(len(orders))])
- else:
- errors = []
- forecasted_best = []
- fig = plt.figure(figsize=tam)
- # fig.suptitle("Comparação de modelos ")
- if plotforecasts:
- ax0 = fig.add_axes([0, 0.4, 0.9, 0.5]) # left, bottom, width, height
- ax0.set_xlim([0, len(train)])
- ax0.set_ylim([min(train) * 0.9, max(train) * 1.1])
- ax0.set_title('Forecasts')
- ax0.set_ylabel('F(T)')
- ax0.set_xlabel('T')
- min_rmse = 1000000.0
- best = None
-
- for pc, p in enumerate(partitions, start=0):
-
- sets = partitioner(data=train, npart=p, transformation=transformation).sets
- for oc, o in enumerate(orders, start=0):
- fts = model("q = " + str(p) + " n = " + str(o))
- fts.append_transformation(transformation)
- fts.train(train, sets=sets, order=o, parameters=parameters)
- if not intervals:
- forecasted = fts.forecast(test)
- if not fts.has_seasonality:
- error = Measures.rmse(np.array(test[o:]), np.array(forecasted[:-1]))
- else:
- error = Measures.rmse(np.array(test[o:]), np.array(forecasted))
- for kk in range(o):
- forecasted.insert(0, None)
- if plotforecasts: ax0.plot(forecasted, label=fts.name)
- else:
- forecasted = fts.forecast_interval(test)
- error = 1.0 - Measures.rmse_interval(np.array(test[o:]), np.array(forecasted[:-1]))
- if _3d:
- errors[oc, pc] = error
- else:
- errors.append( error )
- if error < min_rmse:
- min_rmse = error
- best = fts
- forecasted_best = forecasted
-
- # print(min_rmse)
- if plotforecasts:
- # handles0, labels0 = ax0.get_legend_handles_labels()
- # ax0.legend(handles0, labels0)
- ax0.plot(test, label="Original", linewidth=3.0, color="black")
- if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
- if _3d and not plotforecasts:
- ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
- ax1.set_title('Error Surface')
- ax1.set_ylabel('Model order')
- ax1.set_xlabel('Number of partitions')
- ax1.set_zlabel('RMSE')
- X, Y = np.meshgrid(partitions, orders)
- surf = ax1.plot_surface(X, Y, errors, rstride=1, cstride=1, antialiased=True)
- else:
- ax1 = fig.add_axes([0, 1, 0.9, 0.9])
- ax1.set_title('Error Curve')
- ax1.set_xlabel('Number of partitions')
- ax1.set_ylabel('RMSE')
- ax1.plot(partitions, errors)
- ret.append(best)
- ret.append(forecasted_best)
- ret.append(min_rmse)
-
- # plt.tight_layout()
-
- cUtil.show_and_save_image(fig, file, save)
-
- return ret
-
-
-
-[docs]def pftsExploreOrderAndPartitions(data,save=False, file=None):
- fig, axes = plt.subplots(nrows=4, ncols=1, figsize=[6, 8])
- data_fs1 = Grid.GridPartitioner(data=data, npart=10).sets
- mi = []
- ma = []
-
- axes[0].set_title('Point Forecasts by Order')
- axes[2].set_title('Interval Forecasts by Order')
-
- for order in np.arange(1, 6):
- fts = pwfts.ProbabilisticWeightedFTS("")
- fts.shortname = "n = " + str(order)
- fts.train(data, sets=data_fs1.sets, order=order)
- point_forecasts = fts.forecast(data)
- interval_forecasts = fts.forecast_interval(data)
- lower = [kk[0] for kk in interval_forecasts]
- upper = [kk[1] for kk in interval_forecasts]
- mi.append(min(lower) * 0.95)
- ma.append(max(upper) * 1.05)
- for k in np.arange(0, order):
- point_forecasts.insert(0, None)
- lower.insert(0, None)
- upper.insert(0, None)
- axes[0].plot(point_forecasts, label=fts.shortname)
- axes[2].plot(lower, label=fts.shortname)
- axes[2].plot(upper)
-
- axes[1].set_title('Point Forecasts by Number of Partitions')
- axes[3].set_title('Interval Forecasts by Number of Partitions')
-
- for partitions in np.arange(5, 11):
- data_fs = Grid.GridPartitioner(data=data, npart=partitions).sets
- fts = pwfts.ProbabilisticWeightedFTS("")
- fts.shortname = "q = " + str(partitions)
- fts.train(data, sets=data_fs.sets, order=1)
- point_forecasts = fts.forecast(data)
- interval_forecasts = fts.forecast_interval(data)
- lower = [kk[0] for kk in interval_forecasts]
- upper = [kk[1] for kk in interval_forecasts]
- mi.append(min(lower) * 0.95)
- ma.append(max(upper) * 1.05)
- point_forecasts.insert(0, None)
- lower.insert(0, None)
- upper.insert(0, None)
- axes[1].plot(point_forecasts, label=fts.shortname)
- axes[3].plot(lower, label=fts.shortname)
- axes[3].plot(upper)
-
- for ax in axes:
- ax.set_ylabel('F(T)')
- ax.set_xlabel('T')
- ax.plot(data, label="Original", color="black", linewidth=1.5)
- handles, labels = ax.get_legend_handles_labels()
- ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1))
- ax.set_ylim([min(mi), max(ma)])
- ax.set_xlim([0, len(data)])
-
- plt.tight_layout()
-
- cUtil.show_and_save_image(fig, file, save)
-
-
Source code for pyFTS.benchmarks.knn
-#!/usr/bin/python
-# -*- coding: utf8 -*-
-
-import numpy as np
-from statsmodels.tsa.tsatools import lagmat
-from pyFTS.common import fts
-from pyFTS.probabilistic import ProbabilityDistribution
-
-
-[docs]class KNearestNeighbors(fts.FTS):
- """
- K-Nearest Neighbors
- """
- def __init__(self, **kwargs):
- super(KNearestNeighbors, self).__init__(**kwargs)
- self.name = "kNN"
- self.shortname = "kNN"
- self.detail = "K-Nearest Neighbors"
- self.is_high_order = True
- self.has_point_forecasting = True
- self.has_interval_forecasting = True
- self.has_probability_forecasting = True
- self.benchmark_only = True
- self.min_order = 1
- self.alpha = kwargs.get("alpha", 0.05)
- self.lag = None
- self.k = kwargs.get("k", 30)
- self.uod = None
-
-
-
-
-[docs] def knn(self, sample):
-
- if self.order == 1:
- dist = np.apply_along_axis(lambda x: (x - sample) ** 2, 0, self.data)
- ix = np.argsort(dist) + 1
- else:
- dist = []
- for k in np.arange(self.order, len(self.data)):
- dist.append(sum([ (self.data[k - kk] - sample[kk])**2 for kk in range(self.order)]))
- ix = np.argsort(np.array(dist)) + self.order + 1
-
- ix2 = np.clip(ix[:self.k], 0, len(self.data)-1)
- return self.data[ix2]
-
-[docs] def forecast_distribution(self, data, **kwargs):
- ret = []
-
- smooth = kwargs.get("smooth", "KDE")
- alpha = kwargs.get("alpha", None)
-
- uod = self.get_UoD()
-
- for k in np.arange(self.order, len(data)):
-
- sample = data[k-self.order : k]
-
- forecasts = self.knn(sample)
-
- dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, data=forecasts,
- name="", **kwargs)
- ret.append(dist)
-
- return ret
-
-
-
Source code for pyFTS.benchmarks.naive
-#!/usr/bin/python
-# -*- coding: utf8 -*-
-
-from pyFTS.common import fts
-
-
-[docs]class Naive(fts.FTS):
- """Naïve Forecasting method"""
- def __init__(self, **kwargs):
- super(Naive, self).__init__(order=1, name="Naive",**kwargs)
- self.name = "Naïve Model"
- self.detail = "Naïve Model"
- self.benchmark_only = True
- self.is_high_order = False
-
-
-
-
Source code for pyFTS.benchmarks.quantreg
-#!/usr/bin/python
-# -*- coding: utf8 -*-
-
-import numpy as np
-import pandas as pd
-from statsmodels.regression.quantile_regression import QuantReg
-from statsmodels.tsa.tsatools import lagmat
-from pyFTS.common import SortedCollection, fts
-from pyFTS.probabilistic import ProbabilityDistribution
-
-
-[docs]class QuantileRegression(fts.FTS):
- """Façade for statsmodels.regression.quantile_regression"""
- def __init__(self, **kwargs):
- super(QuantileRegression, self).__init__(**kwargs)
- self.name = "QR"
- self.detail = "Quantile Regression"
- self.is_high_order = True
- self.has_point_forecasting = True
- self.has_interval_forecasting = True
- self.has_probability_forecasting = True
- self.benchmark_only = True
- self.min_order = 1
- self.alpha = kwargs.get("alpha", 0.05)
- self.dist = kwargs.get("dist", False)
- self.upper_qt = None
- self.mean_qt = None
- self.lower_qt = None
- self.dist_qt = None
-
-[docs] def train(self, data, **kwargs):
- if self.indexer is not None and isinstance(data, pd.DataFrame):
- data = self.indexer.get_data(data)
-
- lagdata, ndata = lagmat(data, maxlag=self.order, trim="both", original='sep')
-
- mqt = QuantReg(ndata, lagdata).fit(0.5)
- if self.alpha is not None:
- uqt = QuantReg(ndata, lagdata).fit(1 - self.alpha)
- lqt = QuantReg(ndata, lagdata).fit(self.alpha)
-
- self.mean_qt = [k for k in mqt.params]
- if self.alpha is not None:
- self.upper_qt = [k for k in uqt.params]
- self.lower_qt = [k for k in lqt.params]
-
- if self.dist:
- self.dist_qt = []
- for alpha in np.arange(0.05,0.5,0.05):
- lqt = QuantReg(ndata, lagdata).fit(alpha)
- uqt = QuantReg(ndata, lagdata).fit(1 - alpha)
- lo_qt = [k for k in lqt.params]
- up_qt = [k for k in uqt.params]
- self.dist_qt.append([lo_qt, up_qt])
-
- self.shortname = "QAR(" + str(self.order) + ") - " + str(self.alpha)
-
-[docs] def linearmodel(self,data,params):
- #return params[0] + sum([ data[k] * params[k+1] for k in np.arange(0, self.order) ])
- return sum([data[k] * params[k] for k in np.arange(0, self.order)])
-
-[docs] def point_to_interval(self, data, lo_params, up_params):
- lo = self.linearmodel(data, lo_params)
- up = self.linearmodel(data, up_params)
- return [lo, up]
-
-[docs] def interval_to_interval(self, data, lo_params, up_params):
- lo = self.linearmodel([k[0] for k in data], lo_params)
- up = self.linearmodel([k[1] for k in data], up_params)
- return [lo, up]
-
-[docs] def forecast(self, ndata, **kwargs):
-
- l = len(ndata)
-
- ret = []
-
- for k in np.arange(self.order, l+1): #+1 to forecast one step ahead given all available lags
- sample = ndata[k - self.order : k]
-
- ret.append(self.linearmodel(sample, self.mean_qt))
-
- return ret
-
-[docs] def forecast_interval(self, ndata, **kwargs):
-
- l = len(ndata)
-
- ret = []
-
- for k in np.arange(self.order , l):
- sample = ndata[k - self.order: k]
- ret.append(self.point_to_interval(sample, self.lower_qt, self.upper_qt))
-
- return ret
-
-[docs] def forecast_ahead_interval(self, ndata, steps, **kwargs):
-
- smoothing = kwargs.get("smoothing", 0.9)
-
- l = len(ndata)
-
- ret = []
-
- nmeans = self.forecast_ahead(ndata, steps, **kwargs)
-
- for k in np.arange(0, self.order):
- nmeans.insert(k,ndata[-(k+1)])
-
- for k in np.arange(self.order, steps+self.order):
- intl = self.point_to_interval(nmeans[k - self.order: k], self.lower_qt, self.upper_qt)
-
- ret.append([intl[0]*(1 + k*smoothing), intl[1]*(1 + k*smoothing)])
-
- return ret[-steps:]
-
-[docs] def forecast_distribution(self, ndata, **kwargs):
-
- ret = []
-
- l = len(ndata)
-
- for k in np.arange(self.order, l + 1):
- dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram",
- uod=[self.original_min, self.original_max])
- intervals = []
- for qt in self.dist_qt:
- sample = ndata[k - self.order: k]
- intl = self.point_to_interval(sample, qt[0], qt[1])
- intervals.append(intl)
-
- dist.append_interval(intervals)
-
- ret.append(dist)
-
- return ret
-
-[docs] def forecast_ahead_distribution(self, ndata, steps, **kwargs):
-
- ret = []
-
- for k in np.arange(self.order, steps + self.order):
- dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram",
- uod=[self.original_min, self.original_max])
- intervals = [[k, k] for k in ndata[-self.order:]]
- for qt in self.dist_qt:
- intl = self.interval_to_interval([intervals[x] for x in np.arange(k - self.order, k)], qt[0], qt[1])
- intervals.append(intl)
- dist.append_interval(intervals)
-
- ret.append(dist)
-
- return ret
-
Source code for pyFTS.common.Composite
-"""
-Composite Fuzzy Sets
-"""
-
-import numpy as np
-from pyFTS import *
-from pyFTS.common import Membership, FuzzySet
-
-
-[docs]class FuzzySet(FuzzySet.FuzzySet):
- """
- Composite Fuzzy Set
- """
- def __init__(self, name, superset=False, **kwargs):
- """
- Create an empty composite fuzzy set
- :param name: fuzzy set name
- """
- if 'type' in kwargs:
- kwargs.pop('type')
- super(FuzzySet, self).__init__(name, None, None, None, type='composite', **kwargs)
- self.superset = superset
- if self.superset:
- self.sets = []
- else:
- self.mf = []
- self.parameters = []
-
- self.lower = None
- self.upper = None
- self.centroid = None
-
-
-[docs] def membership(self, x):
- """
- Calculate the membership value of a given input
-
- :param x: input value
- :return: membership value of x at this fuzzy set
- """
- if self.superset:
- return max([s.membership(x) for s in self.sets])
- else:
- return min([self.mf[ct](self.transform(x), self.parameters[ct]) for ct in np.arange(0, len(self.mf))])
-
-
-
-[docs] def append(self, mf, parameters):
- """
- Adds a new function to composition
-
- :param mf:
- :param parameters:
- :return:
- """
- self.mf.append(mf)
- self.parameters.append(parameters)
-
-[docs] def append_set(self, set):
- """
- Adds a new function to composition
-
- :param mf:
- :param parameters:
- :return:
- """
- self.sets.append(set)
-
- if self.lower is None or self.lower > set.lower:
- self.lower = set.lower
-
- if self.upper is None or self.upper < set.upper:
- self.upper = set.upper
-
- if self.centroid is None or self.centroid < set.centroid:
- self.centroid = set.centroid
-
-
- def __str__(self):
- tmp = str([str(k) for k in self.sets])
- return "{}: {}".format(self.name, tmp)
-
-
-
Source code for pyFTS.common.FLR
-"""
-This module implements functions for Fuzzy Logical Relationship generation
-"""
-
-import numpy as np
-from pyFTS.common import FuzzySet
-
-
-[docs]class FLR(object):
- """
- Fuzzy Logical Relationship
-
- Represents a temporal transition of the fuzzy set LHS on time t for the fuzzy set RHS on time t+1.
- """
- def __init__(self, LHS, RHS):
- """
- Creates a Fuzzy Logical Relationship
- """
- self.LHS = LHS
- """Left Hand Side fuzzy set"""
- self.RHS = RHS
- """Right Hand Side fuzzy set"""
-
- def __str__(self):
- return str(self.LHS) + " -> " + str(self.RHS)
-
-
-[docs]class IndexedFLR(FLR):
- """Season Indexed Fuzzy Logical Relationship"""
- def __init__(self, index, LHS, RHS):
- """
- Create a Season Indexed Fuzzy Logical Relationship
- """
- super(IndexedFLR, self).__init__(LHS, RHS)
- self.index = index
- """seasonal index"""
-
- def __str__(self):
- return str(self.index) + ": "+ str(self.LHS) + " -> " + str(self.RHS)
-
-
-[docs]def generate_high_order_recurrent_flr(fuzzyData):
- """
- Create a ordered FLR set from a list of fuzzy sets with recurrence
-
- :param fuzzyData: ordered list of fuzzy sets
- :return: ordered list of FLR
- """
- flrs = []
- for i in np.arange(1,len(fuzzyData)):
- lhs = fuzzyData[i - 1]
- rhs = fuzzyData[i]
- if isinstance(lhs, list) and isinstance(rhs, list):
- for l in lhs:
- for r in rhs:
- tmp = FLR(l, r)
- flrs.append(tmp)
- else:
- tmp = FLR(lhs,rhs)
- flrs.append(tmp)
- return flrs
-
-
-[docs]def generate_recurrent_flrs(fuzzyData):
- """
- Create a ordered FLR set from a list of fuzzy sets with recurrence
-
- :param fuzzyData: ordered list of fuzzy sets
- :return: ordered list of FLR
- """
- flrs = []
- for i in np.arange(1,len(fuzzyData)):
- lhs = [fuzzyData[i - 1]]
- rhs = [fuzzyData[i]]
- for l in np.array(lhs).flatten():
- for r in np.array(rhs).flatten():
- tmp = FLR(l, r)
- flrs.append(tmp)
- return flrs
-
-
-[docs]def generate_non_recurrent_flrs(fuzzyData):
- """
- Create a ordered FLR set from a list of fuzzy sets without recurrence
-
- :param fuzzyData: ordered list of fuzzy sets
- :return: ordered list of FLR
- """
- flrs = generate_recurrent_flrs(fuzzyData)
- tmp = {}
- for flr in flrs: tmp[str(flr)] = flr
- ret = [value for key, value in tmp.items()]
- return ret
-
-
-[docs]def generate_indexed_flrs(sets, indexer, data, transformation=None, alpha_cut=0.0):
- """
- Create a season-indexed ordered FLR set from a list of fuzzy sets with recurrence
-
- :param sets: fuzzy sets
- :param indexer: seasonality indexer
- :param data: original data
- :return: ordered list of FLR
- """
- flrs = []
- index = indexer.get_season_of_data(data)
- ndata = indexer.get_data(data)
- if transformation is not None:
- ndata = transformation.apply(ndata)
- for k in np.arange(1,len(ndata)):
- lhs = FuzzySet.fuzzyfy_series([ndata[k - 1]], sets, method='fuzzy',alpha_cut=alpha_cut)
- rhs = FuzzySet.fuzzyfy_series([ndata[k]], sets, method='fuzzy',alpha_cut=alpha_cut)
- season = index[k]
- for _l in np.array(lhs).flatten():
- for _r in np.array(rhs).flatten():
- flr = IndexedFLR(season,_l,_r)
- flrs.append(flr)
- return flrs
-
Source code for pyFTS.common.FuzzySet
-import numpy as np
-from pyFTS import *
-from pyFTS.common import Membership
-
-
-[docs]class FuzzySet(object):
- """
- Fuzzy Set
- """
-
- def __init__(self, name, mf, parameters, centroid, alpha=1.0, **kwargs):
- """
- Create a Fuzzy Set
- """
- self.name = name
- """The fuzzy set name"""
- self.mf = mf
- """The membership function"""
- self.parameters = parameters
- """The parameters of the membership function"""
- self.centroid = centroid
- """The fuzzy set center of mass (or midpoint)"""
- self.alpha = alpha
- """The alpha cut value"""
- self.type = kwargs.get('type', 'common')
- """The fuzzy set type (common, composite, nonstationary, etc)"""
- self.variable = kwargs.get('variable', None)
- """In multivariate time series, indicate for which variable this fuzzy set belogs"""
- self.Z = None
- """Partition function in respect to the membership function"""
-
- if parameters is not None:
- if self.mf == Membership.gaussmf:
- self.lower = parameters[0] - parameters[1] * 3
- self.upper = parameters[0] + parameters[1] * 3
- elif self.mf == Membership.sigmf:
- k = (parameters[1] / (2 * parameters[0]))
- self.lower = parameters[1] - k
- self.upper = parameters[1] + k
- else:
- self.lower = min(parameters)
- self.upper = max(parameters)
-
- self.metadata = {}
-
-[docs] def transform(self, x):
- """
- Preprocess the data point for non native types
-
- :param x:
- :return: return a native type value for the structured type
- """
-
- return x
-
-[docs] def membership(self, x):
- """
- Calculate the membership value of a given input
-
- :param x: input value
- :return: membership value of x at this fuzzy set
- """
- return self.mf(self.transform(x), self.parameters) * self.alpha
-
-[docs] def partition_function(self, uod=None, nbins=100):
- """
- Calculate the partition function over the membership function.
-
- :param uod:
- :param nbins:
- :return:
- """
- if self.Z is None and uod is not None:
- self.Z = 0.0
- for k in np.linspace(uod[0], uod[1], nbins):
- self.Z += self.membership(k)
-
- return self.Z
-
- def __str__(self):
- return self.name + ": " + str(self.mf.__name__) + "(" + str(self.parameters) + ")"
-
-
-def __binary_search(x, fuzzy_sets, ordered_sets):
- """
- Search for elegible fuzzy sets to fuzzyfy x
-
- :param x: input value to be fuzzyfied
- :param fuzzy_sets: a dictionary where the key is the fuzzy set name and the value is the fuzzy set object.
- :param ordered_sets: a list with the fuzzy sets names ordered by their centroids.
- :return: A list with the best fuzzy sets that may contain x
- """
- max_len = len(fuzzy_sets) - 1
- first = 0
- last = max_len
-
- while first <= last:
- midpoint = (first + last) // 2
-
- fs = ordered_sets[midpoint]
- fs1 = ordered_sets[midpoint - 1] if midpoint > 0 else ordered_sets[0]
- fs2 = ordered_sets[midpoint + 1] if midpoint < max_len else ordered_sets[max_len]
-
- if fuzzy_sets[fs1].centroid <= fuzzy_sets[fs].transform(x) <= fuzzy_sets[fs2].centroid:
- return (midpoint - 1, midpoint, midpoint + 1)
- elif midpoint <= 1:
- return [0]
- elif midpoint >= max_len:
- return [max_len]
- else:
- if fuzzy_sets[fs].transform(x) < fuzzy_sets[fs].centroid:
- last = midpoint - 1
- else:
- first = midpoint + 1
-
-
-[docs]def fuzzyfy(data, partitioner, **kwargs):
- """
- A general method for fuzzyfication.
-
- :param data: input value to be fuzzyfied
- :param partitioner: a trained pyFTS.partitioners.Partitioner object
- :param kwargs: dict, optional arguments
- :keyword alpha_cut: the minimal membership value to be considered on fuzzyfication (only for mode='sets')
- :keyword method: the fuzzyfication method (fuzzy: all fuzzy memberships, maximum: only the maximum membership)
- :keyword mode: the fuzzyfication mode (sets: return the fuzzy sets names, vector: return a vector with the membership
- values for all fuzzy sets, both: return a list with tuples (fuzzy set, membership value) )
- :returns a list with the fuzzyfied values, depending on the mode
-
- """
- alpha_cut = kwargs.get('alpha_cut', 0.)
- mode = kwargs.get('mode', 'sets')
- method = kwargs.get('method', 'fuzzy')
- if isinstance(data, (list, np.ndarray)):
- if mode == 'vector':
- return fuzzyfy_instances(data, partitioner.sets, partitioner.ordered_sets)
- elif mode == 'both':
- mvs = fuzzyfy_instances(data, partitioner.sets, partitioner.ordered_sets)
- fs = []
- for mv in mvs:
- fsets = [(partitioner.ordered_sets[ix], mv[ix])
- for ix in np.arange(len(mv))
- if mv[ix] >= alpha_cut]
- fs.append(fsets)
- return fs
- else:
- return fuzzyfy_series(data, partitioner.sets, method, alpha_cut, partitioner.ordered_sets)
- else:
- if mode == 'vector':
- return fuzzyfy_instance(data, partitioner.sets, partitioner.ordered_sets)
- elif mode == 'both':
- mv = fuzzyfy_instance(data, partitioner.sets, partitioner.ordered_sets)
- fsets = [(partitioner.ordered_sets[ix], mv[ix])
- for ix in np.arange(len(mv))
- if mv[ix] >= alpha_cut]
- return fsets
- else:
- return get_fuzzysets(data, partitioner.sets, partitioner.ordered_sets, alpha_cut)
-
-
-[docs]def set_ordered(fuzzy_sets):
- """
- Order a fuzzy set list by their centroids
-
- :param fuzzy_sets: a dictionary where the key is the fuzzy set name and the value is the fuzzy set object.
- :return: a list with the fuzzy sets names ordered by their centroids.
- """
- if len(fuzzy_sets) > 0:
- tmp1 = [fuzzy_sets[k] for k in fuzzy_sets.keys()]
- return [k.name for k in sorted(tmp1, key=lambda x: x.centroid)]
-
-
-[docs]def fuzzyfy_instance(inst, fuzzy_sets, ordered_sets=None):
- """
- Calculate the membership values for a data point given fuzzy sets
-
- :param inst: data point
- :param fuzzy_sets: a dictionary where the key is the fuzzy set name and the value is the fuzzy set object.
- :param ordered_sets: a list with the fuzzy sets names ordered by their centroids.
- :return: array of membership values
- """
-
- if ordered_sets is None:
- ordered_sets = set_ordered(fuzzy_sets)
-
- mv = np.zeros(len(fuzzy_sets))
-
- for ix in __binary_search(inst, fuzzy_sets, ordered_sets):
- mv[ix] = fuzzy_sets[ordered_sets[ix]].membership(inst)
-
- return mv
-
-
-[docs]def fuzzyfy_instances(data, fuzzy_sets, ordered_sets=None):
- """
- Calculate the membership values for a data point given fuzzy sets
-
- :param inst: data point
- :param fuzzy_sets: a dictionary where the key is the fuzzy set name and the value is the fuzzy set object.
- :param ordered_sets: a list with the fuzzy sets names ordered by their centroids.
- :return: array of membership values
- """
- ret = []
- if ordered_sets is None:
- ordered_sets = set_ordered(fuzzy_sets)
- for inst in data:
- mv = fuzzyfy_instance(inst, fuzzy_sets, ordered_sets)
- ret.append(mv)
- return ret
-
-
-[docs]def get_fuzzysets(inst, fuzzy_sets, ordered_sets=None, alpha_cut=0.0):
- """
- Return the fuzzy sets which membership value for a inst is greater than the alpha_cut
-
- :param inst: data point
- :param fuzzy_sets: a dictionary where the key is the fuzzy set name and the value is the fuzzy set object.
- :param ordered_sets: a list with the fuzzy sets names ordered by their centroids.
- :param alpha_cut: Minimal membership to be considered on fuzzyfication process
- :return: array of membership values
- """
-
- if ordered_sets is None:
- ordered_sets = set_ordered(fuzzy_sets)
-
- try:
- fs = [ordered_sets[ix]
- for ix in __binary_search(inst, fuzzy_sets, ordered_sets)
- if fuzzy_sets[ordered_sets[ix]].membership(inst) > alpha_cut]
- return fs
- except Exception as ex:
- raise ex
-
-
-[docs]def get_maximum_membership_fuzzyset(inst, fuzzy_sets, ordered_sets=None):
- """
- Fuzzify a data point, returning the fuzzy set with maximum membership value
-
- :param inst: data point
- :param fuzzy_sets: a dictionary where the key is the fuzzy set name and the value is the fuzzy set object.
- :param ordered_sets: a list with the fuzzy sets names ordered by their centroids.
- :return: fuzzy set with maximum membership
- """
- if ordered_sets is None:
- ordered_sets = set_ordered(fuzzy_sets)
- mv = np.array([fuzzy_sets[key].membership(inst) for key in ordered_sets])
- key = ordered_sets[np.argwhere(mv == max(mv))[0, 0]]
- return fuzzy_sets[key]
-
-
-[docs]def get_maximum_membership_fuzzyset_index(inst, fuzzy_sets):
- """
- Fuzzify a data point, returning the fuzzy set with maximum membership value
-
- :param inst: data point
- :param fuzzy_sets: dict of fuzzy sets
- :return: fuzzy set with maximum membership
- """
- mv = fuzzyfy_instance(inst, fuzzy_sets)
- return np.argwhere(mv == max(mv))[0, 0]
-
-
-[docs]def fuzzyfy_series_old(data, fuzzy_sets, method='maximum'):
- fts = []
- for item in data:
- fts.append(get_maximum_membership_fuzzyset(item, fuzzy_sets).name)
- return fts
-
-
-[docs]def fuzzyfy_series(data, fuzzy_sets, method='maximum', alpha_cut=0.0, ordered_sets=None):
- fts = []
- if ordered_sets is None:
- ordered_sets = set_ordered(fuzzy_sets)
- for t, i in enumerate(data):
- mv = fuzzyfy_instance(i, fuzzy_sets, ordered_sets)
- if len(mv) == 0:
- sets = check_bounds(i, fuzzy_sets.items(), ordered_sets)
- else:
- if method == 'fuzzy':
- ix = np.ravel(np.argwhere(mv > alpha_cut))
- sets = [fuzzy_sets[ordered_sets[i]].name for i in ix]
- elif method == 'maximum':
- mx = max(mv)
- ix = np.ravel(np.argwhere(mv == mx))
- sets = fuzzy_sets[ordered_sets[ix[0]]].name
- fts.append(sets)
- return fts
-
-
-[docs]def grant_bounds(data, fuzzy_sets, ordered_sets):
- if data < fuzzy_sets[ordered_sets[0]].lower:
- return fuzzy_sets[ordered_sets[0]].lower
- elif data > fuzzy_sets[ordered_sets[-1]].upper:
- return fuzzy_sets[ordered_sets[-1]].upper
- else:
- return data
-
-
-[docs]def check_bounds(data, fuzzy_sets, ordered_sets):
- if data < fuzzy_sets[ordered_sets[0]].lower:
- return fuzzy_sets[ordered_sets[0]]
- elif data > fuzzy_sets[ordered_sets[-1]].upper:
- return fuzzy_sets[ordered_sets[-1]]
-
-
-[docs]def check_bounds_index(data, fuzzy_sets, ordered_sets):
- if data < fuzzy_sets[ordered_sets[0]].get_lower():
- return 0
- elif data > fuzzy_sets[ordered_sets[-1]].get_upper():
- return len(fuzzy_sets) - 1
-
Source code for pyFTS.common.Membership
-"""
-Membership functions for Fuzzy Sets
-"""
-
-import numpy as np
-import math
-from pyFTS import *
-
-
-[docs]def trimf(x, parameters):
- """
- Triangular fuzzy membership function
-
- :param x: data point
- :param parameters: a list with 3 real values
- :return: the membership value of x given the parameters
- """
- xx = np.round(x, 3)
- if xx < parameters[0]:
- return 0
- elif parameters[0] <= xx < parameters[1]:
- return (x - parameters[0]) / (parameters[1] - parameters[0])
- elif parameters[1] <= xx <= parameters[2]:
- return (parameters[2] - xx) / (parameters[2] - parameters[1])
- else:
- return 0
-
-
-[docs]def trapmf(x, parameters):
- """
- Trapezoidal fuzzy membership function
-
- :param x: data point
- :param parameters: a list with 4 real values
- :return: the membership value of x given the parameters
- """
- if x < parameters[0]:
- return 0
- elif parameters[0] <= x < parameters[1]:
- return (x - parameters[0]) / (parameters[1] - parameters[0])
- elif parameters[1] <= x <= parameters[2]:
- return 1
- elif parameters[2] <= x <= parameters[3]:
- return (parameters[3] - x) / (parameters[3] - parameters[2])
- else:
- return 0
-
-
-[docs]def gaussmf(x, parameters):
- """
- Gaussian fuzzy membership function
-
- :param x: data point
- :param parameters: a list with 2 real values (mean and variance)
- :return: the membership value of x given the parameters
- """
- return math.exp((-(x - parameters[0])**2)/(2 * parameters[1]**2))
-
-
-[docs]def bellmf(x, parameters):
- """
- Bell shaped membership function
-
- :param x:
- :param parameters:
- :return:
- """
- return 1 / (1 + abs((x - parameters[2]) / parameters[0]) ** (2 * parameters[1]))
-
-
-[docs]def sigmf(x, parameters):
- """
- Sigmoid / Logistic membership function
-
- :param x:
- :param parameters: an list with 2 real values (smoothness and midpoint)
- :return
- """
- return 1 / (1 + math.exp(-parameters[0] * (x - parameters[1])))
-
-
-[docs]def singleton(x, parameters):
- """
- Singleton membership function, a single value fuzzy function
-
- :param x:
- :param parameters: a list with one real value
- :returns
- """
- return 1 if x == parameters[0] else 0
-
Source code for pyFTS.common.SortedCollection
-from bisect import bisect_left, bisect_right
-
-#
-# Original Source Code: https://code.activestate.com/recipes/577197-sortedcollection/
-# Author: RAYMOND HETTINGER
-
-
-[docs]class SortedCollection(object):
- '''Sequence sorted by a key function.
-
- SortedCollection() is much easier to work with than using bisect() directly.
- It supports key functions like those use in sorted(), min(), and max().
- The result of the key function call is saved so that keys can be searched
- efficiently.
-
- Instead of returning an insertion-point which can be hard to interpret, the
- five find-methods return a specific item in the sequence. They can scan for
- exact matches, the last item less-than-or-equal to a key, or the first item
- greater-than-or-equal to a key.
-
- Once found, an item's ordinal position can be located with the index() method.
- New items can be added with the insert() and insert_right() methods.
- Old items can be deleted with the remove() method.
-
- The usual sequence methods are provided to support indexing, slicing,
- length lookup, clearing, copying, forward and reverse iteration, contains
- checking, item counts, item removal, and a nice looking repr.
-
- Finding and indexing are O(log n) operations while iteration and insertion
- are O(n). The initial sort is O(n log n).
-
- The key function is stored in the 'key' attibute for easy introspection or
- so that you can assign a new key function (triggering an automatic re-sort).
-
- In short, the class was designed to handle all of the common use cases for
- bisect but with a simpler API and support for key functions.
-
- >>> from pprint import pprint
- >>> from operator import itemgetter
-
- >>> s = SortedCollection(key=itemgetter(2))
- >>> for record in [
- ... ('roger', 'young', 30),
- ... ('angela', 'jones', 28),
- ... ('bill', 'smith', 22),
- ... ('david', 'thomas', 32)]:
- ... s.insert(record)
-
- >>> pprint(list(s)) # show records sorted by age
- [('bill', 'smith', 22),
- ('angela', 'jones', 28),
- ('roger', 'young', 30),
- ('david', 'thomas', 32)]
-
- >>> s.find_le(29) # find oldest person aged 29 or younger
- ('angela', 'jones', 28)
- >>> s.find_lt(28) # find oldest person under 28
- ('bill', 'smith', 22)
- >>> s.find_gt(28) # find youngest person over 28
- ('roger', 'young', 30)
-
- >>> r = s.find_ge(32) # find youngest person aged 32 or older
- >>> s.index(r) # get the index of their record
- 3
- >>> s[3] # fetch the record at that index
- ('david', 'thomas', 32)
-
- >>> s.key = itemgetter(0) # now sort by first name
- >>> pprint(list(s))
- [('angela', 'jones', 28),
- ('bill', 'smith', 22),
- ('david', 'thomas', 32),
- ('roger', 'young', 30)]
-
- '''
-
- def __init__(self, iterable=(), key=None):
- self._given_key = key
- key = (lambda x: x) if key is None else key
- decorated = sorted((key(item), item) for item in iterable)
- self._keys = [k for k, item in decorated]
- self._items = [item for k, item in decorated]
- self._key = key
-
- def _getkey(self):
- return self._key
-
- def _setkey(self, key):
- if key is not self._key:
- self.__init__(self._items, key=key)
-
- def _delkey(self):
- self._setkey(None)
-
- key = property(_getkey, _setkey, _delkey, 'key function')
-
-
-
-
-
- def __len__(self):
- return len(self._items)
-
- def __getitem__(self, i):
- return self._items[i]
-
- def __iter__(self):
- return iter(self._items)
-
- def __reversed__(self):
- return reversed(self._items)
-
- def __repr__(self):
- return '%s(%r, key=%s)' % (
- self.__class__.__name__,
- self._items,
- getattr(self._given_key, '__name__', repr(self._given_key))
- )
-
- def __reduce__(self):
- return self.__class__, (self._items, self._given_key)
-
- def __contains__(self, item):
- k = self._key(item)
- i = bisect_left(self._keys, k)
- j = bisect_right(self._keys, k)
- return item in self._items[i:j]
-
-[docs] def index(self, item):
- 'Find the position of an item. Raise ValueError if not found.'
- k = self._key(item)
- i = bisect_left(self._keys, k)
- j = bisect_right(self._keys, k)
- return self._items[i:j].index(item) + i
-
-[docs] def count(self, item):
- 'Return number of occurrences of item'
- k = self._key(item)
- i = bisect_left(self._keys, k)
- j = bisect_right(self._keys, k)
- return self._items[i:j].count(item)
-
-[docs] def insert(self, item):
- 'Insert a new item. If equal keys are found, add to the left'
- k = self._key(item)
- i = bisect_left(self._keys, k)
- self._keys.insert(i, k)
- self._items.insert(i, item)
-
-[docs] def insert_right(self, item):
- 'Insert a new item. If equal keys are found, add to the right'
- k = self._key(item)
- i = bisect_right(self._keys, k)
- self._keys.insert(i, k)
- self._items.insert(i, item)
-
-[docs] def remove(self, item):
- 'Remove first occurence of item. Raise ValueError if not found'
- i = self.index(item)
- del self._keys[i]
- del self._items[i]
-
-[docs] def find(self, k):
- 'Return first item with a key == k. Raise ValueError if not found.'
- i = bisect_left(self._keys, k)
- if i != len(self) and self._keys[i] == k:
- return self._items[i]
- raise ValueError('No item found with key equal to: %r' % (k,))
-
-[docs] def find_le(self, k):
- 'Return last item with a key <= k. Raise ValueError if not found.'
- i = bisect_right(self._keys, k)
- if i:
- return self._items[i-1]
- raise ValueError('No item found with key at or below: %r' % (k,))
-
-[docs] def find_lt(self, k):
- 'Return last item with a key < k. Raise ValueError if not found.'
- i = bisect_left(self._keys, k)
- if i:
- return self._items[i-1]
- raise ValueError('No item found with key below: %r' % (k,))
-
-[docs] def find_ge(self, k):
- 'Return first item with a key >= equal to k. Raise ValueError if not found'
- i = bisect_left(self._keys, k)
- if i != len(self):
- return self._items[i]
- raise ValueError('No item found with key at or above: %r' % (k,))
-
-[docs] def find_gt(self, k):
- 'Return first item with a key > k. Raise ValueError if not found'
- i = bisect_right(self._keys, k)
- if i != len(self):
- return self._items[i]
- raise ValueError('No item found with key above: %r' % (k,))
-
-[docs] def between(self, ge, le):
- g = bisect_left(self._keys, ge)
- l = bisect_right(self._keys, le)
- if g != len(self) and l != len(self):
- return self._items[g : l]
- raise ValueError('No item found between keys : %r,%r' % (ge,le))
-
-[docs] def inside(self, ge, le):
- l = bisect_right(self._keys, le)
- g = bisect_left(self._keys, ge)
- if g != len(self) and l != len(self) and g != l:
- return self._items[g : l]
- elif g != len(self) and l != len(self) and g == l:
- return [ self._items[g] ]
- elif g != len(self):
- return self._items[g-1: l]
- elif l != len(self):
- return self._items[g: l-1]
- else:
- return self._items[g - 1: l - 1]
- raise ValueError('No item found inside keys: %r,%r' % (ge,le))
-
-[docs] def around(self, k):
- g = bisect_right(self._keys, k)
- l = bisect_left(self._keys, k)
- if g != len(self) and l != len(self):
- return self._items[g : l]
- raise ValueError('No item found around key : %r' % (k,))
-
Source code for pyFTS.common.Transformations
-"""
-Common data transformation used on pre and post processing of the FTS
-"""
-
-import numpy as np
-import math
-from pyFTS import *
-
-
-[docs]class Transformation(object):
- """
- Data transformation used on pre and post processing of the FTS
- """
-
- def __init__(self, **kwargs):
- self.is_invertible = True
- self.minimal_length = 1
-
-[docs] def apply(self, data, param, **kwargs):
- """
- Apply the transformation on input data
-
- :param data: input data
- :param param:
- :param kwargs:
- :return: numpy array with transformed data
- """
- pass
-
-[docs] def inverse(self,data, param, **kwargs):
- """
-
- :param data: transformed data
- :param param:
- :param kwargs:
- :return: numpy array with inverse transformed data
- """
- pass
-
- def __str__(self):
- return self.__class__.__name__ + '(' + str(self.parameters) + ')'
-
-
-[docs]class Differential(Transformation):
- """
- Differentiation data transform
- """
- def __init__(self, lag):
- super(Differential, self).__init__()
- self.lag = lag
- self.minimal_length = 2
-
- @property
- def parameters(self):
- return self.lag
-
-[docs] def apply(self, data, param=None, **kwargs):
- if param is not None:
- self.lag = param
-
- if not isinstance(data, (list, np.ndarray, np.generic)):
- data = [data]
-
- if isinstance(data, (np.ndarray, np.generic)):
- data = data.tolist()
-
- n = len(data)
- diff = [data[t] - data[t - self.lag] for t in np.arange(self.lag, n)]
- for t in np.arange(0, self.lag): diff.insert(0, 0)
- return diff
-
-[docs] def inverse(self, data, param, **kwargs):
-
- type = kwargs.get("type","point")
- steps_ahead = kwargs.get("steps_ahead", 1)
-
- if isinstance(data, (np.ndarray, np.generic)):
- data = data.tolist()
-
- if not isinstance(data, list):
- data = [data]
-
- n = len(data)
-
-# print(n)
-# print(len(param))
-
- if steps_ahead == 1:
- if type == "point":
- inc = [data[t] + param[t] for t in np.arange(0, n)]
- elif type == "interval":
- inc = [[data[t][0] + param[t], data[t][1] + param[t]] for t in np.arange(0, n)]
- elif type == "distribution":
- for t in np.arange(0, n):
- data[t].differential_offset(param[t])
- inc = data
- else:
- if type == "point":
- inc = [data[0] + param[0]]
- for t in np.arange(1, steps_ahead):
- inc.append(data[t] + inc[t-1])
- elif type == "interval":
- inc = [[data[0][0] + param[0], data[0][1] + param[0]]]
- for t in np.arange(1, steps_ahead):
- inc.append([data[t][0] + np.nanmean(inc[t-1]), data[t][1] + np.nanmean(inc[t-1])])
- elif type == "distribution":
- data[0].differential_offset(param[0])
- for t in np.arange(1, steps_ahead):
- ex = data[t-1].expected_value()
- data[t].differential_offset(ex)
- inc = data
-
- if n == 1:
- return inc[0]
- else:
- return inc
-
-
-[docs]class Scale(Transformation):
- """
- Scale data inside a interval [min, max]
-
-
- """
- def __init__(self, min=0, max=1):
- super(Scale, self).__init__()
- self.data_max = None
- self.data_min = None
- self.transf_max = max
- self.transf_min = min
-
- @property
- def parameters(self):
- return [self.transf_max, self.transf_min]
-
-[docs] def apply(self, data, param=None,**kwargs):
- if self.data_max is None:
- self.data_max = np.nanmax(data)
- self.data_min = np.nanmin(data)
- data_range = self.data_max - self.data_min
- transf_range = self.transf_max - self.transf_min
- if isinstance(data, list):
- tmp = [(k + (-1 * self.data_min)) / data_range for k in data]
- tmp2 = [ (k * transf_range) + self.transf_min for k in tmp]
- else:
- tmp = (data + (-1 * self.data_min)) / data_range
- tmp2 = (tmp * transf_range) + self.transf_min
-
- return tmp2
-
-[docs] def inverse(self, data, param, **kwargs):
- data_range = self.data_max - self.data_min
- transf_range = self.transf_max - self.transf_min
- if isinstance(data, list):
- tmp2 = [(k - self.transf_min) / transf_range for k in data]
- tmp = [(k * data_range) + self.data_min for k in tmp2]
- else:
- tmp2 = (data - self.transf_min) / transf_range
- tmp = (tmp2 * data_range) + self.data_min
- return tmp
-
-
-[docs]class AdaptiveExpectation(Transformation):
- """
- Adaptive Expectation post processing
- """
- def __init__(self, parameters):
- super(AdaptiveExpectation, self).__init__(parameters)
- self.h = parameters
-
- @property
- def parameters(self):
- return self.parameters
-
-
-
-[docs] def inverse(self, data, param,**kwargs):
- n = len(data)
-
- inc = [param[t] + self.h*(data[t] - param[t]) for t in np.arange(0, n)]
-
- if n == 1:
- return inc[0]
- else:
- return inc
-
-
-[docs]class BoxCox(Transformation):
- """
- Box-Cox power transformation
- """
- def __init__(self, plambda):
- super(BoxCox, self).__init__()
- self.plambda = plambda
-
- @property
- def parameters(self):
- return self.plambda
-
-[docs] def apply(self, data, param=None, **kwargs):
- if self.plambda != 0:
- modified = [(dat ** self.plambda - 1) / self.plambda for dat in data]
- else:
- modified = [np.log(dat) for dat in data]
- return np.array(modified)
-
-[docs] def inverse(self, data, param=None, **kwargs):
- if self.plambda != 0:
- modified = [np.exp(np.log(dat * self.plambda + 1) ) / self.plambda for dat in data]
- else:
- modified = [np.exp(dat) for dat in data]
- return np.array(modified)
-
-
-[docs]def Z(original):
- mu = np.mean(original)
- sigma = np.std(original)
- z = [(k - mu)/sigma for k in original]
- return z
-
-
-# retrieved from Sadaei and Lee (2014) - Multilayer Stock ForecastingModel Using Fuzzy Time Series
-[docs]def roi(original):
- n = len(original)
- roi = []
- for t in np.arange(0, n-1):
- roi.append( (original[t+1] - original[t])/original[t] )
- return roi
-
-
-
-
-
Source code for pyFTS.common.Util
-"""
-Common facilities for pyFTS
-"""
-
-import time
-import matplotlib.pyplot as plt
-import dill
-import numpy as np
-import pandas as pd
-import matplotlib.cm as cmx
-import matplotlib.colors as pltcolors
-from pyFTS.probabilistic import ProbabilityDistribution
-from pyFTS.common import Transformations
-
-
-
-
-[docs]def plot_compared_intervals_ahead(original, models, colors, distributions, time_from, time_to, intervals = True,
- save=False, file=None, tam=[20, 5], resolution=None,
- cmap='Blues', linewidth=1.5):
- """
- Plot the forecasts of several one step ahead models, by point or by interval
-
- :param original: Original time series data (list)
- :param models: List of models to compare
- :param colors: List of models colors
- :param distributions: True to plot a distribution
- :param time_from: index of data poit to start the ahead forecasting
- :param time_to: number of steps ahead to forecast
- :param interpol: Fill space between distribution plots
- :param save: Save the picture on file
- :param file: Filename to save the picture
- :param tam: Size of the picture
- :param resolution:
- :param cmap: Color map to be used on distribution plot
- :param option: Distribution type to be passed for models
- :return:
- """
- fig = plt.figure(figsize=tam)
- ax = fig.add_subplot(111)
-
- cm = plt.get_cmap(cmap)
- cNorm = pltcolors.Normalize(vmin=0, vmax=1)
- scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=cm)
-
- if resolution is None: resolution = (max(original) - min(original)) / 100
-
- mi = []
- ma = []
-
- for count, fts in enumerate(models, start=0):
- if fts.has_probability_forecasting and distributions[count]:
- density = fts.forecast_ahead_distribution(original[time_from - fts.order:time_from], time_to,
- resolution=resolution)
-
- #plot_density_scatter(ax, cmap, density, fig, resolution, time_from, time_to)
- plot_density_rectange(ax, cm, density, fig, resolution, time_from, time_to)
-
- if fts.has_interval_forecasting and intervals:
- forecasts = fts.forecast_ahead_interval(original[time_from - fts.order:time_from], time_to)
- lower = [kk[0] for kk in forecasts]
- upper = [kk[1] for kk in forecasts]
- mi.append(min(lower))
- ma.append(max(upper))
- for k in np.arange(0, time_from - fts.order):
- lower.insert(0, None)
- upper.insert(0, None)
- ax.plot(lower, color=colors[count], label=fts.shortname, linewidth=linewidth)
- ax.plot(upper, color=colors[count], linewidth=linewidth*1.5)
-
- ax.plot(original, color='black', label="Original", linewidth=linewidth*1.5)
- handles0, labels0 = ax.get_legend_handles_labels()
- if True in distributions:
- lgd = ax.legend(handles0, labels0, loc=2)
- else:
- lgd = ax.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1))
- _mi = min(mi)
- if _mi < 0:
- _mi *= 1.1
- else:
- _mi *= 0.9
- _ma = max(ma)
- if _ma < 0:
- _ma *= 0.9
- else:
- _ma *= 1.1
-
- ax.set_ylim([_mi, _ma])
- ax.set_ylabel('F(T)')
- ax.set_xlabel('T')
- ax.set_xlim([0, len(original)])
-
- show_and_save_image(fig, file, save, lgd=lgd)
-
-
-
-[docs]def plot_density_rectange(ax, cmap, density, fig, resolution, time_from, time_to):
- """
- Auxiliar function to plot_compared_intervals_ahead
- """
- from matplotlib.patches import Rectangle
- from matplotlib.collections import PatchCollection
- patches = []
- colors = []
- for x in density.index:
- for y in density.columns:
- s = Rectangle((time_from + x, y), 1, resolution, fill=True, lw = 0)
- patches.append(s)
- colors.append(density[y][x]*5)
- pc = PatchCollection(patches=patches, match_original=True)
- pc.set_clim([0, 1])
- pc.set_cmap(cmap)
- pc.set_array(np.array(colors))
- ax.add_collection(pc)
- cb = fig.colorbar(pc, ax=ax)
- cb.set_label('Density')
-
-
-[docs]def plot_probability_distributions(pmfs, lcolors, tam=[15, 7]):
- fig = plt.figure(figsize=tam)
- ax = fig.add_subplot(111)
-
- for k,m in enumerate(pmfs,start=0):
- m.plot(ax, color=lcolors[k])
-
- handles0, labels0 = ax.get_legend_handles_labels()
- ax.legend(handles0, labels0)
-
-[docs]def plot_distribution(ax, cmap, probabilitydist, fig, time_from, reference_data=None):
- '''
- Plot forecasted ProbabilityDistribution objects on a matplotlib axis
-
- :param ax: matplotlib axis
- :param cmap: matplotlib colormap name
- :param probabilitydist: list of ProbabilityDistribution objects
- :param fig: matplotlib figure
- :param time_from: starting time (on x axis) to begin the plots
- :param reference_data:
- :return:
- '''
- from matplotlib.patches import Rectangle
- from matplotlib.collections import PatchCollection
- patches = []
- colors = []
- for ct, dt in enumerate(probabilitydist):
- disp = 0.0
- if reference_data is not None:
- disp = reference_data[time_from+ct]
-
- for y in dt.bins:
- s = Rectangle((time_from+ct, y+disp), 1, dt.resolution, fill=True, lw = 0)
- patches.append(s)
- colors.append(dt.density(y))
- scale = Transformations.Scale()
- colors = scale.apply(colors)
- pc = PatchCollection(patches=patches, match_original=True)
- pc.set_clim([0, 1])
- pc.set_cmap(cmap)
- pc.set_array(np.array(colors))
- ax.add_collection(pc)
- cb = fig.colorbar(pc, ax=ax)
- cb.set_label('Density')
-
-
-[docs]def plot_interval(axis, intervals, order, label, color='red', typeonlegend=False, ls='-', linewidth=1):
- '''
- Plot forecasted intervals on matplotlib
-
- :param axis: matplotlib axis
- :param intervals: list of forecasted intervals
- :param order: order of the model that create the forecasts
- :param label: figure label
- :param color: matplotlib color name
- :param typeonlegend:
- :param ls: matplotlib line style
- :param linewidth: matplotlib width
- :return:
- '''
- lower = [kk[0] for kk in intervals]
- upper = [kk[1] for kk in intervals]
- mi = min(lower) * 0.95
- ma = max(upper) * 1.05
- for k in np.arange(0, order):
- lower.insert(0, None)
- upper.insert(0, None)
- if typeonlegend: label += " (Interval)"
- axis.plot(lower, color=color, label=label, ls=ls,linewidth=linewidth)
- axis.plot(upper, color=color, ls=ls,linewidth=linewidth)
- return [mi, ma]
-
-
-[docs]def plot_rules(model, size=[5, 5], axis=None, rules_by_axis=None, columns=1):
- '''
- Plot the FLRG rules of a FTS model on a matplotlib axis
-
- :param model: FTS model
- :param size: figure size
- :param axis: matplotlib axis
- :param rules_by_axis: number of rules plotted by column
- :param columns: number of columns
- :return:
- '''
- if axis is None and rules_by_axis is None:
- rows = 1
- elif axis is None and rules_by_axis is not None:
- rows = (((len(model.flrgs.keys())//rules_by_axis)) // columns)+1
-
- fig, axis = plt.subplots(nrows=rows, ncols=columns, figsize=size)
-
- if rules_by_axis is None:
- draw_sets_on_axis(axis, model, size)
-
- _lhs = model.partitioner.ordered_sets if not model.is_high_order else model.flrgs.keys()
-
- for ct, key in enumerate(_lhs):
-
- xticks = []
- xtickslabels = []
-
- if rules_by_axis is None:
- ax = axis
- else:
- colcount = (ct // rules_by_axis) % columns
- rowcount = (ct // rules_by_axis) // columns
-
- if rows > 1 and columns > 1:
- ax = axis[rowcount, colcount]
- elif columns > 1:
- ax = axis[rowcount]
- else:
- ax = axis
-
- if ct % rules_by_axis == 0:
- draw_sets_on_axis(ax, model, size)
-
- if not model.is_high_order:
- if key in model.flrgs:
- x = (ct % rules_by_axis) + 1
- flrg = model.flrgs[key]
- y = model.sets[key].centroid
- ax.plot([x],[y],'o')
- xticks.append(x)
- xtickslabels.append(key)
- for rhs in flrg.RHS:
- dest = model.sets[rhs].centroid
- ax.arrow(x+.1, y, 0.8, dest - y, #length_includes_head=True,
- head_width=0.1, head_length=0.1, shape='full', overhang=0,
- fc='k', ec='k')
- else:
- flrg = model.flrgs[key]
- x = (ct%rules_by_axis)*model.order + 1
- for ct2, lhs in enumerate(flrg.LHS):
- y = model.sets[lhs].centroid
- ax.plot([x+ct2], [y], 'o')
- xticks.append(x+ct2)
- xtickslabels.append(lhs)
- for ct2 in range(1, model.order):
- fs1 = flrg.LHS[ct2-1]
- fs2 = flrg.LHS[ct2]
- y = model.sets[fs1].centroid
- dest = model.sets[fs2].centroid
- ax.plot([x+ct2-1,x+ct2], [y,dest],'-')
-
- y = model.sets[flrg.LHS[-1]].centroid
- for rhs in flrg.RHS:
- dest = model.sets[rhs].centroid
- ax.arrow(x + model.order -1 + .1, y, 0.8, dest - y, # length_includes_head=True,
- head_width=0.1, head_length=0.1, shape='full', overhang=0,
- fc='k', ec='k')
-
-
- ax.set_xticks(xticks)
- ax.set_xticklabels(xtickslabels)
- ax.set_xlim([0,rules_by_axis*model.order+1])
-
- plt.tight_layout()
- plt.show()
-
-
-[docs]def draw_sets_on_axis(axis, model, size):
- if axis is None:
- fig, axis = plt.subplots(nrows=1, ncols=1, figsize=size)
- for ct, key in enumerate(model.partitioner.ordered_sets):
- fs = model.sets[key]
- axis.plot([0, 1, 0], fs.parameters, label=fs.name)
- axis.axhline(fs.centroid, c="lightgray", alpha=0.5)
- axis.set_xlim([0, len(model.partitioner.ordered_sets)])
- axis.set_xticks(range(0, len(model.partitioner.ordered_sets)))
- tmp = ['']
- tmp.extend(model.partitioner.ordered_sets)
- axis.set_xticklabels(tmp)
- axis.set_ylim([model.partitioner.min, model.partitioner.max])
- axis.set_yticks([model.sets[k].centroid for k in model.partitioner.ordered_sets])
- axis.set_yticklabels([str(round(model.sets[k].centroid, 1)) + " - " + k
- for k in model.partitioner.ordered_sets])
-
-
-current_milli_time = lambda: int(round(time.time() * 1000))
-
-
-[docs]def uniquefilename(name):
- if '.' in name:
- tmp = name.split('.')
- return tmp[0] + str(current_milli_time()) + '.' + tmp[1]
- else:
- return name + str(current_milli_time())
-
-
-
-[docs]def show_and_save_image(fig, file, flag, lgd=None):
- """
- Show and image and save on file
-
- :param fig: Matplotlib Figure object
- :param file: filename to save the picture
- :param flag: if True the image will be saved
- :param lgd: legend
- """
- plt.show()
- if flag:
- if lgd is not None:
- fig.savefig(file, additional_artists=lgd,bbox_inches='tight') #bbox_extra_artists=(lgd,), )
- else:
- fig.savefig(file)
- plt.close(fig)
-
-
-
-
-
-[docs]def sliding_window(data, windowsize, train=0.8, inc=0.1, **kwargs):
- """
- Sliding window method of cross validation for time series
-
- :param data: the entire dataset
- :param windowsize: window size
- :param train: percentual of the window size will be used for training the models
- :param inc: percentual of data used for slide the window
- :return: window count, training set, test set
- """
-
- multivariate = True if isinstance(data, pd.DataFrame) else False
-
- l = len(data) if not multivariate else len(data.index)
- ttrain = int(round(windowsize * train, 0))
- ic = int(round(windowsize * inc, 0))
-
- progressbar = kwargs.get('progress', None)
-
- rng = np.arange(0,l-windowsize+ic,ic)
-
- if progressbar:
- from tqdm import tqdm
- rng = tqdm(rng)
-
- for count in rng:
- if count + windowsize > l:
- _end = l
- else:
- _end = count + windowsize
- if multivariate:
- yield (count, data.iloc[count: count + ttrain], data.iloc[count + ttrain: _end])
- else:
- yield (count, data[count : count + ttrain], data[count + ttrain : _end] )
-
-
-[docs]def persist_obj(obj, file):
- """
- Persist an object on filesystem. This function depends on Dill package
-
- :param obj: object on memory
- :param file: file name to store the object
- """
- try:
- with open(file, 'wb') as _file:
- dill.dump(obj, _file)
- except Exception as ex:
- print("File {} could not be saved due exception {}".format(file, ex))
-
-
-[docs]def load_obj(file):
- """
- Load to memory an object stored filesystem. This function depends on Dill package
-
- :param file: file name where the object is stored
- :return: object
- """
- with open(file, 'rb') as _file:
- obj = dill.load(_file)
- return obj
-
-
-[docs]def persist_env(file):
- """
- Persist an entire environment on file. This function depends on Dill package
-
- :param file: file name to store the environment
- """
- dill.dump_session(file)
-
-
-
-
-
-
-
Source code for pyFTS.common.flrg
-import numpy as np
-
-
-[docs]class FLRG(object):
- """
- Fuzzy Logical Relationship Group
-
- Group a set of FLR's with the same LHS. Represents the temporal patterns for time t+1 (the RHS fuzzy sets)
- when the LHS pattern is identified on time t.
- """
-
- def __init__(self, order, **kwargs):
- self.LHS = None
- """Left Hand Side of the rule"""
- self.RHS = None
- """Right Hand Side of the rule"""
- self.order = order
- """Number of lags on LHS"""
- self.midpoint = None
- self.lower = None
- self.upper = None
- self.key = None
-
-
-
-[docs] def get_key(self):
- """Returns a unique identifier for this FLRG"""
- if self.key is None:
- if isinstance(self.LHS, (list, set)):
- names = [c for c in self.LHS]
- elif isinstance(self.LHS, dict):
- names = [self.LHS[k] for k in self.LHS.keys()]
- else:
- names = [self.LHS]
-
- self.key = ""
-
- for n in names:
- if len(self.key) > 0:
- self.key += ","
- self.key = self.key + n
- return self.key
-
-[docs] def get_membership(self, data, sets):
- """
- Returns the membership value of the FLRG for the input data
-
- :param data: input data
- :param sets: fuzzy sets
- :return: the membership value
- """
- ret = 0.0
- if isinstance(self.LHS, (list, set)):
- if len(self.LHS) == len(data):
- ret = np.nanmin([sets[self.LHS[ct]].membership(dat) for ct, dat in enumerate(data)])
- else:
- ret = sets[self.LHS].membership(data)
- return ret
-
-[docs] def get_midpoint(self, sets):
- """
- Returns the midpoint value for the RHS fuzzy sets
-
- :param sets: fuzzy sets
- :return: the midpoint value
- """
- if self.midpoint is None:
- self.midpoint = np.nanmean(self.get_midpoints(sets))
- return self.midpoint
-
-[docs] def get_midpoints(self, sets):
- if isinstance(self.RHS, (list, set)):
- return np.array([sets[s].centroid for s in self.RHS])
- elif isinstance(self.RHS, dict):
- return np.array([sets[s].centroid for s in self.RHS.keys()])
-
-[docs] def get_lower(self, sets):
- """
- Returns the lower bound value for the RHS fuzzy sets
-
- :param sets: fuzzy sets
- :return: lower bound value
- """
- if self.lower is None:
- if isinstance(self.RHS, list):
- self.lower = min([sets[rhs].lower for rhs in self.RHS])
- elif isinstance(self.RHS, dict):
- self.lower = min([sets[self.RHS[s]].lower for s in self.RHS.keys()])
- return self.lower
-
-[docs] def get_upper(self, sets):
- """
- Returns the upper bound value for the RHS fuzzy sets
-
- :param sets: fuzzy sets
- :return: upper bound value
- """
- if self.upper is None:
- if isinstance(self.RHS, list):
- self.upper = max([sets[rhs].upper for rhs in self.RHS])
- elif isinstance(self.RHS, dict):
- self.upper = max([sets[self.RHS[s]].upper for s in self.RHS.keys()])
- return self.upper
-
- def __len__(self):
- return len(self.RHS)
-
-[docs] def reset_calculated_values(self):
- self.midpoint = None
- self.upper = None
- self.lower = None
-
-
-
Source code for pyFTS.common.fts
-import numpy as np
-import pandas as pd
-from pyFTS.common import FuzzySet, SortedCollection, tree, Util
-
-
-[docs]class FTS(object):
- """
- Fuzzy Time Series object model
- """
- def __init__(self, **kwargs):
- """
- Create a Fuzzy Time Series model
- """
-
- self.sets = {}
- """The list of fuzzy sets used on this model"""
- self.flrgs = {}
- """The list of Fuzzy Logical Relationship Groups - FLRG"""
- self.order = kwargs.get('order',1)
- """A integer with the model order (number of past lags are used on forecasting)"""
- self.shortname = kwargs.get('name',"")
- """A string with a short name or alias for the model"""
- self.name = kwargs.get('name',"")
- """A string with the model name"""
- self.detail = kwargs.get('name',"")
- """A string with the model detailed information"""
- self.is_wrapper = False
- """Indicates that this model is a wrapper for other(s) method(s)"""
- self.is_high_order = False
- """A boolean value indicating if the model support orders greater than 1, default: False"""
- self.min_order = 1
- """In high order models, this integer value indicates the minimal order supported for the model, default: 1"""
- self.has_seasonality = False
- """A boolean value indicating if the model supports seasonal indexers, default: False"""
- self.has_point_forecasting = True
- """A boolean value indicating if the model supports point forecasting, default: True"""
- self.has_interval_forecasting = False
- """A boolean value indicating if the model supports interval forecasting, default: False"""
- self.has_probability_forecasting = False
- """A boolean value indicating if the model support probabilistic forecasting, default: False"""
- self.is_multivariate = False
- """A boolean value indicating if the model support multivariate time series (Pandas DataFrame), default: False"""
- self.is_clustered = False
- """A boolean value indicating if the model support multivariate time series (Pandas DataFrame), but works like
- a monovariate method, default: False"""
- self.dump = False
- self.transformations = []
- """A list with the data transformations (common.Transformations) applied on model pre and post processing, default: []"""
- self.transformations_param = []
- """A list with the specific parameters for each data transformation"""
- self.original_max = 0
- """A float with the upper limit of the Universe of Discourse, the maximal value found on training data"""
- self.original_min = 0
- """A float with the lower limit of the Universe of Discourse, the minimal value found on training data"""
- self.partitioner = kwargs.get("partitioner", None)
- """A pyFTS.partitioners.Partitioner object with the Universe of Discourse partitioner used on the model. This is a mandatory dependecy. """
- if self.partitioner != None:
- self.sets = self.partitioner.sets
- self.auto_update = False
- """A boolean value indicating that model is incremental"""
- self.benchmark_only = False
- """A boolean value indicating a façade for external (non-FTS) model used on benchmarks or ensembles."""
- self.indexer = kwargs.get("indexer", None)
- """An pyFTS.models.seasonal.Indexer object for indexing the time series data"""
- self.uod_clip = kwargs.get("uod_clip", True)
- """Flag indicating if the test data will be clipped inside the training Universe of Discourse"""
- self.alpha_cut = kwargs.get("alpha_cut", 0.0)
- """A float with the minimal membership to be considered on fuzzyfication process"""
- self.lags = kwargs.get("lags", None)
- """The list of lag indexes for high order models"""
- self.max_lag = self.order
- """A integer indicating the largest lag used by the model. This value also indicates the minimum number of past lags
- needed to forecast a single step ahead"""
- self.log = pd.DataFrame([],columns=["Datetime","Operation","Value"])
- """"""
-
-[docs] def fuzzy(self, data):
- """
- Fuzzify a data point
-
- :param data: data point
- :return: maximum membership fuzzy set
- """
- best = {"fuzzyset": "", "membership": 0.0}
-
- for f in self.sets:
- fset = self.sets[f]
- if best["membership"] <= fset.membership(data):
- best["fuzzyset"] = fset.name
- best["membership"] = fset.membership(data)
-
- return best
-
-[docs] def predict(self, data, **kwargs):
- """
- Forecast using trained model
-
- :param data: time series with minimal length to the order of the model
-
- :keyword type: the forecasting type, one of these values: point(default), interval, distribution or multivariate.
- :keyword steps_ahead: The forecasting horizon, i. e., the number of steps ahead to forecast
- :keyword start: in the multi step forecasting, the index of the data where to start forecasting
- :keyword distributed: boolean, indicate if the forecasting procedure will be distributed in a dispy cluster
- :keyword nodes: a list with the dispy cluster nodes addresses
- :keyword explain: try to explain, step by step, the one-step-ahead point forecasting result given the input data.
- :keyword generators: for multivariate methods on multi step ahead forecasting, generators is a dict where the keys
- are the dataframe columun names (except the target_variable) and the values are lambda functions that
- accept one value (the actual value of the variable) and return the next value or trained FTS
- models that accept the actual values and forecast new ones.
-
- :return: a numpy array with the forecasted data
- """
-
- if self.is_multivariate:
- ndata = data
- else:
- ndata = self.apply_transformations(data)
-
- if self.uod_clip:
- ndata = np.clip(ndata, self.original_min, self.original_max)
-
- if 'distributed' in kwargs:
- distributed = kwargs.pop('distributed')
- else:
- distributed = False
-
- if distributed is None or distributed == False:
-
- if 'type' in kwargs:
- type = kwargs.pop("type")
- else:
- type = 'point'
-
- steps_ahead = kwargs.get("steps_ahead", None)
-
- if steps_ahead == None or steps_ahead == 1:
- if type == 'point':
- ret = self.forecast(ndata, **kwargs)
- elif type == 'interval':
- ret = self.forecast_interval(ndata, **kwargs)
- elif type == 'distribution':
- ret = self.forecast_distribution(ndata, **kwargs)
- elif type == 'multivariate':
- ret = self.forecast_multivariate(ndata, **kwargs)
- elif steps_ahead > 1:
- if type == 'point':
- ret = self.forecast_ahead(ndata, steps_ahead, **kwargs)
- elif type == 'interval':
- ret = self.forecast_ahead_interval(ndata, steps_ahead, **kwargs)
- elif type == 'distribution':
- ret = self.forecast_ahead_distribution(ndata, steps_ahead, **kwargs)
- elif type == 'multivariate':
- ret = self.forecast_ahead_multivariate(ndata, steps_ahead, **kwargs)
-
- if not ['point', 'interval', 'distribution', 'multivariate'].__contains__(type):
- raise ValueError('The argument \'type\' has an unknown value.')
-
- else:
-
- if distributed == 'dispy':
- from pyFTS.distributed import dispy
-
- nodes = kwargs.get("nodes", ['127.0.0.1'])
- num_batches = kwargs.get('num_batches', 10)
-
- ret = dispy.distributed_predict(self, kwargs, nodes, ndata, num_batches)
-
- elif distributed == 'spark':
- from pyFTS.distributed import spark
-
- ret = spark.distributed_predict(data=ndata, model=self, **kwargs)
-
-
- if not self.is_multivariate:
- kwargs['type'] = type
- ret = self.apply_inverse_transformations(ret, params=[data[self.max_lag - 1:]], **kwargs)
-
- return ret
-
-[docs] def forecast(self, data, **kwargs):
- """
- Point forecast one step ahead
-
- :param data: time series data with the minimal length equal to the max_lag of the model
- :param kwargs: model specific parameters
- :return: a list with the forecasted values
- """
- raise NotImplementedError('This model do not perform one step ahead point forecasts!')
-
-[docs] def forecast_interval(self, data, **kwargs):
- """
- Interval forecast one step ahead
-
- :param data: time series data with the minimal length equal to the max_lag of the model
- :param kwargs: model specific parameters
- :return: a list with the prediction intervals
- """
- raise NotImplementedError('This model do not perform one step ahead interval forecasts!')
-
-[docs] def forecast_distribution(self, data, **kwargs):
- """
- Probabilistic forecast one step ahead
-
- :param data: time series data with the minimal length equal to the max_lag of the model
- :param kwargs: model specific parameters
- :return: a list with probabilistic.ProbabilityDistribution objects representing the forecasted Probability Distributions
- """
- raise NotImplementedError('This model do not perform one step ahead distribution forecasts!')
-
-[docs] def forecast_multivariate(self, data, **kwargs):
- """
- Multivariate forecast one step ahead
-
- :param data: Pandas dataframe with one column for each variable and with the minimal length equal to the max_lag of the model
- :param kwargs: model specific parameters
- :return: a Pandas Dataframe object representing the forecasted values for each variable
- """
- raise NotImplementedError('This model do not perform one step ahead multivariate forecasts!')
-
-[docs] def forecast_ahead(self, data, steps, **kwargs):
- """
- Point forecast n steps ahead
-
- :param data: time series data with the minimal length equal to the max_lag of the model
- :param steps: the number of steps ahead to forecast
- :keyword start: in the multi step forecasting, the index of the data where to start forecasting
- :return: a list with the forecasted values
- """
-
- if isinstance(data, np.ndarray):
- data = data.tolist()
-
- ret = []
- for k in np.arange(0, steps):
- tmp = self.forecast(data[-self.max_lag:], **kwargs)
-
- if isinstance(tmp,(list, np.ndarray)):
- tmp = tmp[-1]
-
- ret.append(tmp)
- data.append(tmp)
-
- return ret
-
-[docs] def forecast_ahead_interval(self, data, steps, **kwargs):
- """
- Interval forecast n steps ahead
-
- :param data: time series data with the minimal length equal to the max_lag of the model
- :param steps: the number of steps ahead to forecast
- :param kwargs: model specific parameters
- :return: a list with the forecasted intervals
- """
- raise NotImplementedError('This model do not perform multi step ahead interval forecasts!')
-
-[docs] def forecast_ahead_distribution(self, data, steps, **kwargs):
- """
- Probabilistic forecast n steps ahead
-
- :param data: time series data with the minimal length equal to the max_lag of the model
- :param steps: the number of steps ahead to forecast
- :param kwargs: model specific parameters
- :return: a list with the forecasted Probability Distributions
- """
- raise NotImplementedError('This model do not perform multi step ahead distribution forecasts!')
-
-[docs] def forecast_ahead_multivariate(self, data, steps, **kwargs):
- """
- Multivariate forecast n step ahead
-
- :param data: Pandas dataframe with one column for each variable and with the minimal length equal to the max_lag of the model
- :param steps: the number of steps ahead to forecast
- :param kwargs: model specific parameters
- :return: a Pandas Dataframe object representing the forecasted values for each variable
- """
- raise NotImplementedError('This model do not perform one step ahead multivariate forecasts!')
-
-[docs] def train(self, data, **kwargs):
- """
- Method specific parameter fitting
-
- :param data: training time series data
- :param kwargs: Method specific parameters
-
- """
- pass
-
-[docs] def fit(self, ndata, **kwargs):
- """
- Fit the model's parameters based on the training data.
-
- :param ndata: training time series data
- :param kwargs:
-
- :keyword num_batches: split the training data in num_batches to save memory during the training process
- :keyword save_model: save final model on disk
- :keyword batch_save: save the model between each batch
- :keyword file_path: path to save the model
- :keyword distributed: boolean, indicate if the training procedure will be distributed in a dispy cluster
- :keyword nodes: a list with the dispy cluster nodes addresses
-
- """
-
- import datetime
-
- if self.is_multivariate:
- data = ndata
- else:
- data = self.apply_transformations(ndata)
-
- self.original_min = np.nanmin(data)
- self.original_max = np.nanmax(data)
-
- if 'sets' in kwargs:
- self.sets = kwargs.pop('sets')
-
- if 'partitioner' in kwargs:
- self.partitioner = kwargs.pop('partitioner')
-
- if not self.is_wrapper:
- if (self.sets is None or len(self.sets) == 0) and not self.benchmark_only and not self.is_multivariate:
- if self.partitioner is not None:
- self.sets = self.partitioner.sets
- else:
- raise Exception("Fuzzy sets were not provided for the model. Use 'sets' parameter or 'partitioner'. ")
-
- if 'order' in kwargs:
- self.order = kwargs.pop('order')
-
- dump = kwargs.get('dump', None)
-
- num_batches = kwargs.get('num_batches', 1)
-
- save = kwargs.get('save_model', False) # save model on disk
-
- batch_save = kwargs.get('batch_save', False) #save model between batches
-
- file_path = kwargs.get('file_path', None)
-
- distributed = kwargs.get('distributed', False)
-
- batch_save_interval = kwargs.get('batch_save_interval', 10)
-
- if distributed is not None and distributed:
-
- if distributed == 'dispy':
- from pyFTS.distributed import dispy
- nodes = kwargs.get('nodes', False)
- train_method = kwargs.get('train_method', dispy.simple_model_train)
- dispy.distributed_train(self, train_method, nodes, type(self), data, num_batches, {},
- batch_save=batch_save, file_path=file_path,
- batch_save_interval=batch_save_interval)
- elif distributed == 'spark':
- from pyFTS.distributed import spark
- url = kwargs.get('url', 'spark://192.168.0.110:7077')
- app = kwargs.get('app', 'pyFTS')
-
- spark.distributed_train(self, data, url=url, app=app)
- else:
-
- if dump == 'time':
- print("[{0: %H:%M:%S}] Start training".format(datetime.datetime.now()))
-
- if num_batches is not None:
- n = len(data)
- batch_size = int(n / num_batches)
- bcount = 1
-
- rng = range(self.order, n, batch_size)
-
- if dump == 'tqdm':
- from tqdm import tqdm
-
- rng = tqdm(rng)
-
- for ct in rng:
- if dump == 'time':
- print("[{0: %H:%M:%S}] Starting batch ".format(datetime.datetime.now()) + str(bcount))
- if self.is_multivariate:
- mdata = data.iloc[ct - self.order:ct + batch_size]
- else:
- mdata = data[ct - self.order : ct + batch_size]
-
- self.train(mdata, **kwargs)
-
- if batch_save:
- Util.persist_obj(self,file_path)
-
- if dump == 'time':
- print("[{0: %H:%M:%S}] Finish batch ".format(datetime.datetime.now()) + str(bcount))
-
- bcount += 1
-
- else:
- self.train(data, **kwargs)
-
- if dump == 'time':
- print("[{0: %H:%M:%S}] Finish training".format(datetime.datetime.now()))
-
- if save:
- Util.persist_obj(self, file_path)
-
-
-[docs] def clone_parameters(self, model):
- """
- Import the parameters values from other model
-
- :param model:
- """
-
- self.order = model.order
- self.shortname = model.shortname
- self.name = model.name
- self.detail = model.detail
- self.is_high_order = model.is_high_order
- self.min_order = model.min_order
- self.has_seasonality = model.has_seasonality
- self.has_point_forecasting = model.has_point_forecasting
- self.has_interval_forecasting = model.has_interval_forecasting
- self.has_probability_forecasting = model.has_probability_forecasting
- self.is_multivariate = model.is_multivariate
- self.dump = model.dump
- self.transformations = model.transformations
- self.transformations_param = model.transformations_param
- self.original_max = model.original_max
- self.original_min = model.original_min
- self.partitioner = model.partitioner
- self.sets = model.sets
- self.auto_update = model.auto_update
- self.benchmark_only = model.benchmark_only
- self.indexer = model.indexer
-
-[docs] def append_rule(self, flrg):
- """
- Append FLRG rule to the model
-
- :param flrg: rule
- :return:
- """
-
- if flrg.get_key() not in self.flrgs:
- self.flrgs[flrg.get_key()] = flrg
- else:
- if isinstance(flrg.RHS, (list, set)):
- for k in flrg.RHS:
- self.flrgs[flrg.get_key()].append_rhs(k)
- elif isinstance(flrg.RHS, dict):
- for key, value in flrg.RHS.items():
- self.flrgs[flrg.get_key()].append_rhs(key, count=value)
- else:
- self.flrgs[flrg.get_key()].append_rhs(flrg.RHS)
-
-[docs] def merge(self, model):
- """
- Merge the FLRG rules from other model
-
- :param model: source model
- :return:
- """
-
- for key, flrg in model.flrgs.items():
- self.append_rule(flrg)
-
-[docs] def append_transformation(self, transformation):
- if transformation is not None:
- self.transformations.append(transformation)
-
-[docs] def apply_transformations(self, data, params=None, updateUoD=False, **kwargs):
- """
- Apply the data transformations for data preprocessing
-
- :param data: input data
- :param params: transformation parameters
- :param updateUoD:
- :param kwargs:
- :return: preprocessed data
- """
-
- ndata = data
- if updateUoD:
- if min(data) < 0:
- self.original_min = min(data) * 1.1
- else:
- self.original_min = min(data) * 0.9
-
- if max(data) > 0:
- self.original_max = max(data) * 1.1
- else:
- self.original_max = max(data) * 0.9
-
- if len(self.transformations) > 0:
- if params is None:
- params = [ None for k in self.transformations]
-
- for c, t in enumerate(self.transformations, start=0):
- ndata = t.apply(ndata,params[c])
-
- return ndata
-
-[docs] def apply_inverse_transformations(self, data, params=None, **kwargs):
- """
- Apply the data transformations for data postprocessing
-
- :param data: input data
- :param params: transformation parameters
- :param updateUoD:
- :param kwargs:
- :return: postprocessed data
- """
- if len(self.transformations) > 0:
- if params is None:
- params = [None for k in self.transformations]
-
- for c, t in enumerate(reversed(self.transformations), start=0):
- ndata = t.inverse(data, params[c], **kwargs)
-
- return ndata
- else:
- return data
-
-[docs] def get_UoD(self):
- #return [self.original_min, self.original_max]
- return [self.partitioner.min, self.partitioner.max]
-
- def __str__(self):
- """String representation of the model"""
-
- tmp = self.name + ":\n"
- if self.partitioner.type == 'common':
- for r in sorted(self.flrgs, key=lambda key: self.flrgs[key].get_midpoint(self.partitioner.sets)):
- tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r]))
- else:
- for r in self.flrgs:
- tmp = "{0}{1}\n".format(tmp, str(self.flrgs[r]))
- return tmp
-
- def __len__(self):
- """
- The length (number of rules) of the model
-
- :return: number of rules
- """
- return len(self.flrgs)
-
-[docs] def len_total(self):
- """
- Total length of the model, adding the number of terms in all rules
-
- :return:
- """
- return sum([len(k) for k in self.flrgs])
-
-[docs] def reset_calculated_values(self):
- """
- Reset all pre-calculated values on the FLRG's
-
- :return:
- """
-
- for flrg in self.flrgs.keys():
- self.flrgs[flrg].reset_calculated_values()
-
-
-
-
-
-
-
-
-
Source code for pyFTS.common.tree
-"""
-Tree data structure
-"""
-
-from pyFTS import *
-from functools import reduce
-import numpy as np
-
-
-[docs]class FLRGTreeNode:
- """
- Tree node for
- """
-
- def __init__(self, value):
- self.isRoot = False
- self.children = []
- self.value = value
-
-
-
-
-
-[docs] def paths(self, acc=[]):
- if len(self.children) == 0:
- yield [self.value] + acc
-
- for child in self.children:
- for leaf_path in child.paths([self.value] + acc): # these two
- yield leaf_path
-
-[docs] def getStr(self, k):
- if self.isRoot:
- tmp = str(self.value)
- else:
- tmp = "\\" + ("-" * k) + str(self.value)
- for child in self.getChildren():
- tmp = tmp + "\n" + child.getStr(k + 1)
- return tmp
-
- def __str__(self):
- return self.getStr(0)
-
-
-[docs]class FLRGTree:
- """Represents a FLRG set with a tree structure"""
- def __init__(self):
- self.root = FLRGTreeNode(None)
-
-
-[docs]def flat(dados):
- for inst in dados:
- if isinstance(inst, (list, tuple)):
- x = flat(inst)
- for k in x:
- yield k
- else:
- yield inst
-
-
-[docs]def build_tree_without_order(node, lags, level):
-
- if level not in lags:
- return
-
- for s in lags[level]:
- node.appendChild(FLRGTreeNode(s))
-
- for child in node.getChildren():
- build_tree_without_order(child, lags, level + 1)
-
Source code for pyFTS.data.AirPassengers
-"""
-Monthly totals of a airline passengers from USA, from January 1949 through December 1960.
-
-Source: Hyndman, R.J., Time Series Data Library, http://www-personal.buseco.monash.edu.au/~hyndman/TSDL/.
-"""
-
-from pyFTS.data import common
-import pandas as pd
-import numpy as np
-
-
-[docs]def get_data():
- """
- Get a simple univariate time series data.
-
- :return: numpy array
- """
- dat = get_dataframe()
- dat = np.array(dat["Passengers"])
- return dat
-
-[docs]def get_dataframe():
- """
- Get the complete multivariate time series data.
-
- :return: Pandas DataFrame
- """
- dat = common.get_dataframe('AirPassengers.csv',
- 'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/AirPassengers.csv',
- sep=",")
- return dat
-
-
Source code for pyFTS.data.Bitcoin
-"""
-Bitcoin to USD quotations
-
-Daily averaged index, by business day, from 2010 to 2018.
-
-Source: https://finance.yahoo.com/quote/BTC-USD?p=BTC-USD
-"""
-
-
-from pyFTS.data import common
-import pandas as pd
-import numpy as np
-
-
-[docs]def get_data(field='AVG'):
- """
- Get the univariate time series data.
-
- :param field: dataset field to load
- :return: numpy array
- """
- dat = get_dataframe()
- return np.array(dat[field])
-
-[docs]def get_dataframe():
- """
- Get the complete multivariate time series data.
-
- :return: Pandas DataFrame
- """
- df = common.get_dataframe("BTCUSD.csv", "https://query.data.world/s/72gews5w3c7oaf7by5vp7evsasluia",
- sep=",")
-
- return df
-
-
Source code for pyFTS.data.DowJones
-"""
-DJI - Dow Jones
-
-Daily averaged index, by business day, from 1985 to 2017.
-
-Source: https://finance.yahoo.com/quote/%5EGSPC/history?p=%5EGSPC
-"""
-
-
-from pyFTS.data import common
-import pandas as pd
-import numpy as np
-
-
-[docs]def get_data(field='AVG'):
- """
- Get the univariate time series data.
-
- :param field: dataset field to load
- :return: numpy array
- """
- dat = get_dataframe()
- return np.array(dat[field])
-
-
-[docs]def get_dataframe():
- """
- Get the complete multivariate time series data.
-
- :return: Pandas DataFrame
- """
- df = common.get_dataframe("DowJones.csv", "https://query.data.world/s/d4hfir3xrelkx33o3bfs5dbhyiztml",
- sep=",")
-
- return df
-
-
Source code for pyFTS.data.EURGBP
-"""
-FOREX market EUR-GBP pair.
-
-Daily averaged quotations, by business day, from 2016 to 2018.
-"""
-
-
-from pyFTS.data import common
-import pandas as pd
-import numpy as np
-
-
-[docs]def get_data(field='avg'):
- """
- Get the univariate time series data.
-
- :param field: dataset field to load
- :return: numpy array
- """
- dat = get_dataframe()
- return np.array(dat[field])
-
-
-[docs]def get_dataframe():
- """
- Get the complete multivariate time series data.
-
- :return: Pandas DataFrame
- """
- df = common.get_dataframe("EURGBP.csv", "https://query.data.world/s/gvsaeruthnxjkwzl7z4ki7u5rduah3",
- sep=",")
-
- return df
-
-
Source code for pyFTS.data.EURUSD
-"""
-FOREX market EUR-USD pair.
-
-Daily averaged quotations, by business day, from 2016 to 2018.
-"""
-
-
-from pyFTS.data import common
-import pandas as pd
-import numpy as np
-
-
-[docs]def get_data(field='avg'):
- """
- Get the univariate time series data.
-
- :param field: dataset field to load
- :return: numpy array
- """
- dat = get_dataframe()
- return np.array(dat[field])
-
-
-[docs]def get_dataframe():
- """
- Get the complete multivariate time series data.
-
- :return: Pandas DataFrame
- """
- df = common.get_dataframe("EURUSD.csv", "https://query.data.world/s/od4eojioz4w6o5bbwxjfn6j5zoqtos",
- sep=",")
-
- return df
-
-
Source code for pyFTS.data.Enrollments
-"""
-Yearly University of Alabama enrollments from 1971 to 1992.
-"""
-
-from pyFTS.data import common
-import pandas as pd
-import numpy as np
-
-
-[docs]def get_data():
- """
- Get a simple univariate time series data.
-
- :return: numpy array
- """
- dat = get_dataframe()
- dat = np.array(dat["Enrollments"])
- return dat
-
-
-[docs]def get_dataframe():
- dat = common.get_dataframe('Enrollments.csv',
- 'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/Enrollments.csv',
- sep=";")
- return dat
-
Source code for pyFTS.data.Ethereum
-"""
-Ethereum to USD quotations
-
-Daily averaged index, by business day, from 2016 to 2018.
-
-Source: https://finance.yahoo.com/quote/ETH-USD?p=ETH-USD
-"""
-
-
-from pyFTS.data import common
-import pandas as pd
-import numpy as np
-
-
-[docs]def get_data(field='AVG'):
- """
- Get the univariate time series data.
-
- :param field: dataset field to load
- :return: numpy array
- """
- dat = get_dataframe()
- return np.array(dat[field])
-
-
-[docs]def get_dataframe():
- """
- Get the complete multivariate time series data.
-
- :return: Pandas DataFrame
- """
- df = common.get_dataframe("ETHUSD.csv", "https://query.data.world/s/qj4ly7o4rl7oq527xzy4v76wkr3hws",
- sep=",")
-
- return df
-
-
Source code for pyFTS.data.GBPUSD
-"""
-FOREX market GBP-USD pair.
-
-Daily averaged quotations, by business day, from 2016 to 2018.
-"""
-
-
-from pyFTS.data import common
-import pandas as pd
-import numpy as np
-
-
-[docs]def get_data(field='avg'):
- """
- Get the univariate time series data.
-
- :param field: dataset field to load
- :return: numpy array
- """
- dat = get_dataframe()
- return np.array(dat[field])
-
-
-[docs]def get_dataframe():
- """
- Get the complete multivariate time series data.
-
- :return: Pandas DataFrame
- """
- df = common.get_dataframe("GBPUSD.csv", "https://query.data.world/s/sw4mijpowb3mqv6bsat7cdj54hyxix",
- sep=",")
-
- return df
-
-
Source code for pyFTS.data.INMET
-"""
-INMET - Instituto Nacional Meteorologia / Brasil
-
-Belo Horizonte station, from 2000-01-01 to 31/12/2012
-
-Source: http://www.inmet.gov.br
-"""
-
-from pyFTS.data import common
-import pandas as pd
-
-
-[docs]def get_dataframe():
- """
- Get the complete multivariate time series data.
-
- :return: Pandas DataFrame
- """
- dat = common.get_dataframe('INMET.csv.bz2',
- 'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/INMET.csv.bz2',
- sep=";", compression='bz2')
- dat["DataHora"] = pd.to_datetime(dat["DataHora"], format='%d/%m/%Y %H:%M')
- return dat
-
Source code for pyFTS.data.Malaysia
-"""
-Hourly Malaysia eletric load and tempeature
-"""
-
-
-from pyFTS.data import common
-import pandas as pd
-import numpy as np
-
-
-[docs]def get_data(field='load'):
- """
- Get the univariate time series data.
-
- :param field: dataset field to load
- :return: numpy array
- """
- dat = get_dataframe()
- return np.array(dat[field])
-
-
-[docs]def get_dataframe():
- """
- Get the complete multivariate time series data.
-
- :return: Pandas DataFrame
- """
- df = common.get_dataframe("malaysia.csv","https://query.data.world/s/e5arbthdytod3m7wfcg7gmtluh3wa5",
- sep=";")
-
- return df
-
- return df
-
-
Source code for pyFTS.data.NASDAQ
-"""
-National Association of Securities Dealers Automated Quotations - Composite Index (NASDAQ IXIC)
-
-Daily averaged index by business day, from 2000 to 2016.
-
-Source: http://www.nasdaq.com/aspx/flashquotes.aspx?symbol=IXIC&selected=IXIC
-"""
-
-from pyFTS.data import common
-import pandas as pd
-import numpy as np
-
-
-[docs]def get_data(field="avg"):
- """
- Get a simple univariate time series data.
-
- :param field: the dataset field name to extract
- :return: numpy array
- """
- dat = get_dataframe()
- dat = np.array(dat[field])
- return dat
-
-
-[docs]def get_dataframe():
- """
- Get the complete multivariate time series data.
-
- :return: Pandas DataFrame
- """
- dat = common.get_dataframe('NASDAQ.csv.bz2',
- 'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/NASDAQ.csv.bz2',
- sep=",", compression='bz2')
- return dat
-
-
Source code for pyFTS.data.SONDA
-"""
-SONDA - Sistema de Organização Nacional de Dados Ambientais, from INPE - Instituto Nacional de Pesquisas Espaciais, Brasil.
-
-Brasilia station
-
-Source: http://sonda.ccst.inpe.br/
-
-"""
-from pyFTS.data import common
-import pandas as pd
-import numpy as np
-
-
-[docs]def get_data(field):
- """
- Get a simple univariate time series data.
-
- :param field: the dataset field name to extract
- :return: numpy array
- """
- dat = get_dataframe()
- dat = np.array(dat[field])
- return dat
-
-
-[docs]def get_dataframe():
- """
- Get the complete multivariate time series data.
-
- :return: Pandas DataFrame
- """
- dat = common.get_dataframe('SONDA_BSB.csv.bz2',
- 'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/SONDA_BSB.csv.bz2',
- sep=";", compression='bz2')
- dat["datahora"] = pd.to_datetime(dat["datahora"], format='%Y-%m-%d %H:%M:%S')
- return dat
-
-
Source code for pyFTS.data.SP500
-"""
-S&P500 - Standard & Poor's 500
-
-Daily averaged index, by business day, from 1950 to 2017.
-
-Source: https://finance.yahoo.com/quote/%5EGSPC/history?p=%5EGSPC
-"""
-
-from pyFTS.data import common
-import pandas as pd
-import numpy as np
-
-
-[docs]def get_data():
- """
- Get the univariate time series data.
-
- :return: numpy array
- """
- dat = get_dataframe()
- return np.array(dat["Avg"])
-
-
-[docs]def get_dataframe():
- """
- Get the complete multivariate time series data.
-
- :return: Pandas DataFrame
- """
- dat = common.get_dataframe('SP500.csv.bz2',
- 'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/SP500.csv.bz2',
- sep=",", compression='bz2')
- return dat
-
-
Source code for pyFTS.data.TAIEX
-"""
-The Taiwan Stock Exchange Capitalization Weighted Stock Index (TAIEX)
-
-Daily averaged index by business day, from 1995 to 2014.
-
-Source: http://www.twse.com.tw/en/products/indices/Index_Series.php
-"""
-from pyFTS.data import common
-import pandas as pd
-import numpy as np
-
-
-[docs]def get_data():
- """
- Get the univariate time series data.
-
- :return: numpy array
- """
- dat = get_dataframe()
- dat = np.array(dat["avg"])
- return dat
-
-
-[docs]def get_dataframe():
- """
- Get the complete multivariate time series data.
-
- :return: Pandas DataFrame
- """
- dat = common.get_dataframe('TAIEX.csv.bz2',
- 'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/TAIEX.csv.bz2',
- sep=",", compression='bz2')
- dat["Date"] = pd.to_datetime(dat["Date"])
- return dat
-
-
Source code for pyFTS.data.artificial
-"""
-Facilities to generate synthetic stochastic processes
-"""
-
-import numpy as np
-
-
-[docs]class SignalEmulator(object):
- """
- Emulate a complex signal built from several additive and non-additive components
- """
-
- def __init__(self, **kwargs):
- super(SignalEmulator, self).__init__()
-
- self.components = []
- """Components of the signal"""
-
-[docs] def stationary_gaussian(self, mu, sigma, **kwargs):
- """
- Creates a continuous Gaussian signal with mean mu and variance sigma.
-
- :param mu: mean
- :param sigma: variance
- :keyword additive: If False it cancels the previous signal and start this one, if True
- this signal is added to the previous one
- :keyword start: lag index to start this signal, the default value is 0
- :keyword it: Number of iterations, the default value is 1
- :keyword length: Number of samples generated on each iteration, the default value is 100
- :keyword vmin: Lower bound value of generated data, the default value is None
- :keyword vmax: Upper bound value of generated data, the default value is None
- :return: the current SignalEmulator instance, for method chaining
- """
- parameters = {'mu': mu, 'sigma': sigma}
- self.components.append({'dist': 'gaussian', 'type': 'constant',
- 'parameters': parameters, 'args': kwargs})
- return self
-
-[docs] def incremental_gaussian(self, mu, sigma, **kwargs):
- """
- Creates an additive gaussian interference on a previous signal
-
- :param mu: increment on mean
- :param sigma: increment on variance
- :keyword start: lag index to start this signal, the default value is 0
- :keyword it: Number of iterations, the default value is 1
- :keyword length: Number of samples generated on each iteration, the default value is 100
- :keyword vmin: Lower bound value of generated data, the default value is None
- :keyword vmax: Upper bound value of generated data, the default value is None
- :return: the current SignalEmulator instance, for method chaining
- """
- parameters = {'mu': mu, 'sigma': sigma}
- self.components.append({'dist': 'gaussian', 'type': 'incremental',
- 'parameters': parameters, 'args': kwargs})
- return self
-
-[docs] def periodic_gaussian(self, type, period, mu_min, sigma_min, mu_max, sigma_max, **kwargs):
- """
- Creates an additive periodic gaussian interference on a previous signal
-
- :param type: 'linear' or 'sinoidal'
- :param period: the period of recurrence
- :param mu: increment on mean
- :param sigma: increment on variance
- :keyword start: lag index to start this signal, the default value is 0
- :keyword it: Number of iterations, the default value is 1
- :keyword length: Number of samples generated on each iteration, the default value is 100
- :keyword vmin: Lower bound value of generated data, the default value is None
- :keyword vmax: Upper bound value of generated data, the default value is None
- :return: the current SignalEmulator instance, for method chaining
- """
- parameters = {'type':type, 'period':period,
- 'mu_min': mu_min, 'sigma_min': sigma_min, 'mu_max': mu_max, 'sigma_max': sigma_max}
- self.components.append({'dist': 'gaussian', 'type': 'periodic',
- 'parameters': parameters, 'args': kwargs})
- return self
-
-[docs] def blip(self, **kwargs):
- """
- Creates an outlier greater than the maximum or lower then the minimum previous values of the signal,
- and insert it on a random location of the signal.
-
- :return: the current SignalEmulator instance, for method chaining
- """
- parameters = {}
- self.components.append({'dist': 'blip', 'type': 'blip',
- 'parameters': parameters, 'args':kwargs})
- return self
-
-[docs] def run(self):
- """
- Render the signal
-
- :return: a list of float values
- """
- signal = []
- last_it = 10
- last_num = 10
- for ct, component in enumerate(self.components):
- parameters = component['parameters']
- kwargs = component['args']
- additive = kwargs.get('additive', True)
- start = kwargs.get('start', 0)
- it = kwargs.get('it', last_it)
- num = kwargs.get('length', last_num)
- vmin = kwargs.get('vmin',None)
- vmax = kwargs.get('vmax', None)
- if component['type'] == 'constant':
- tmp = generate_gaussian_linear(parameters['mu'], parameters['sigma'], 0, 0,
- it=it, num=num, vmin=vmin, vmax=vmax)
- elif component['type'] == 'incremental':
- tmp = generate_gaussian_linear(0, 0, parameters['mu'], parameters['sigma'],
- it=num, num=1, vmin=vmin, vmax=vmax)
- elif component['type'] == 'periodic':
- period = parameters['period']
- mu_min, sigma_min = parameters['mu_min'],parameters['sigma_min']
- mu_max, sigma_max = parameters['mu_max'],parameters['sigma_max']
-
- if parameters['type'] == 'sinoidal':
- tmp = generate_sinoidal_periodic_gaussian(period, mu_min, sigma_min, mu_max, sigma_max,
- it=num, num=1, vmin=vmin, vmax=vmax)
- else:
- tmp = generate_linear_periodic_gaussian(period, mu_min, sigma_min, mu_max, sigma_max,
- it=num, num=1, vmin=vmin, vmax=vmax)
- elif component['type'] == 'blip':
- _mx = np.nanmax(signal)
- _mn = np.nanmin(signal)
-
- _mx += 2*_mx if _mx > 0 else -2*_mx
- _mn += -2*_mn if _mn > 0 else 2*_mn
-
- if vmax is not None:
- _mx = min(_mx, vmax) if vmax > 0 else max(_mx, vmax)
- if vmin is not None:
- _mn = max(_mn, vmin) if vmin > 0 else min(_mn, vmin)
-
- start = np.random.randint(0, len(signal))
- tmp = [_mx] if np.random.rand() >= .5 else [-_mn]
-
- last_num = num
- last_it = it
-
- signal = _append(additive, start, signal, tmp)
-
- return signal
-
-
-
-
-[docs]def generate_gaussian_linear(mu_ini, sigma_ini, mu_inc, sigma_inc, it=100, num=10, vmin=None, vmax=None):
- """
- Generate data sampled from Gaussian distribution, with constant or linear changing parameters
-
- :param mu_ini: Initial mean
- :param sigma_ini: Initial variance
- :param mu_inc: Mean increment after 'num' samples
- :param sigma_inc: Variance increment after 'num' samples
- :param it: Number of iterations
- :param num: Number of samples generated on each iteration
- :param vmin: Lower bound value of generated data
- :param vmax: Upper bound value of generated data
- :return: A list of it*num float values
- """
- mu = mu_ini
- sigma = sigma_ini
- ret = []
- for k in np.arange(0,it):
- tmp = np.random.normal(mu, sigma, num)
- if vmin is not None:
- tmp = np.maximum(np.full(num, vmin), tmp)
- if vmax is not None:
- tmp = np.minimum(np.full(num, vmax), tmp)
- ret.extend(tmp)
- mu += mu_inc
- sigma += sigma_inc
- return ret
-
-
-[docs]def generate_linear_periodic_gaussian(period, mu_min, sigma_min, mu_max, sigma_max, it=100, num=10, vmin=None, vmax=None):
- """
- Generates a periodic linear variation on mean and variance
-
- :param period: the period of recurrence
- :param mu_min: initial (and minimum) mean of each period
- :param sigma_min: initial (and minimum) variance of each period
- :param mu_max: final (and maximum) mean of each period
- :param sigma_max: final (and maximum) variance of each period
- :param it: Number of iterations
- :param num: Number of samples generated on each iteration
- :param vmin: Lower bound value of generated data
- :param vmax: Upper bound value of generated data
- :return: A list of it*num float values
- """
-
- if period > it:
- raise("The 'period' parameter must be lesser than 'it' parameter")
-
- mu_inc = (mu_max - mu_min)/period
- sigma_inc = (sigma_max - sigma_min) / period
- mu = mu_min
- sigma = sigma_min
- ret = []
- signal = True
-
- for k in np.arange(0, it):
- tmp = np.random.normal(mu, sigma, num)
- if vmin is not None:
- tmp = np.maximum(np.full(num, vmin), tmp)
- if vmax is not None:
- tmp = np.minimum(np.full(num, vmax), tmp)
- ret.extend(tmp)
-
- if k % period == 0:
- signal = not signal
-
- mu += (mu_inc if signal else -mu_inc)
- sigma += (sigma_inc if signal else -sigma_inc)
-
- sigma = max(sigma, 0.005)
-
- return ret
-
-
-[docs]def generate_sinoidal_periodic_gaussian(period, mu_min, sigma_min, mu_max, sigma_max, it=100, num=10, vmin=None, vmax=None):
- """
- Generates a periodic sinoidal variation on mean and variance
-
- :param period: the period of recurrence
- :param mu_min: initial (and minimum) mean of each period
- :param sigma_min: initial (and minimum) variance of each period
- :param mu_max: final (and maximum) mean of each period
- :param sigma_max: final (and maximum) variance of each period
- :param it: Number of iterations
- :param num: Number of samples generated on each iteration
- :param vmin: Lower bound value of generated data
- :param vmax: Upper bound value of generated data
- :return: A list of it*num float values
- """
- mu_range = mu_max - mu_min
- sigma_range = sigma_max - sigma_min
- mu = mu_min
- sigma = sigma_min
- ret = []
-
- for k in np.arange(0, it):
- tmp = np.random.normal(mu, sigma, num)
- if vmin is not None:
- tmp = np.maximum(np.full(num, vmin), tmp)
- if vmax is not None:
- tmp = np.minimum(np.full(num, vmax), tmp)
- ret.extend(tmp)
-
- mu += mu_range * np.sin(period * k)
- sigma += sigma_range * np.sin(period * k)
-
- sigma = max(sigma, 0.005)
-
- return ret
-
-
-[docs]def generate_uniform_linear(min_ini, max_ini, min_inc, max_inc, it=100, num=10, vmin=None, vmax=None):
- """
- Generate data sampled from Uniform distribution, with constant or linear changing bounds
-
- :param mu_ini: Initial mean
- :param sigma_ini: Initial variance
- :param mu_inc: Mean increment after 'num' samples
- :param sigma_inc: Variance increment after 'num' samples
- :param it: Number of iterations
- :param num: Number of samples generated on each iteration
- :param vmin: Lower bound value of generated data
- :param vmax: Upper bound value of generated data
- :return: A list of it*num float values
- """
- _min = min_ini
- _max = max_ini
- ret = []
- for k in np.arange(0,it):
- tmp = np.random.uniform(_min, _max, num)
- if vmin is not None:
- tmp = np.maximum(np.full(num, vmin), tmp)
- if vmax is not None:
- tmp = np.minimum(np.full(num, vmax), tmp)
- ret.extend(tmp)
- _min += min_inc
- _max += max_inc
- return ret
-
-
-[docs]def white_noise(n=500):
- """
- Simple Gaussian noise signal
- :param n: number of samples
- :return:
- """
- return np.random.normal(0, 1, n)
-
-
-[docs]def random_walk(n=500, type='gaussian'):
- """
- Simple random walk
-
- :param n: number of samples
- :param type: 'gaussian' or 'uniform'
- :return:
- """
- if type == 'gaussian':
- tmp = generate_gaussian_linear(0, 1, 0, 0, it=1, num=n)
- else:
- tmp = generate_uniform_linear(-1, 1, 0, 0, it=1, num=n)
- ret = [0]
- for i in range(n):
- ret.append(tmp[i] + ret[i])
-
- return ret
-
-
-def _append(additive, start, before, new):
- if not additive:
- before.extend(new)
- return before
- else:
- for k in range(start):
- new.insert(0,0)
-
- l1 = len(before)
- l2 = len(new)
-
- if l2 < l1:
- new.extend(np.zeros(l1 - l2).tolist())
- elif 0 < l1 < l2:
- new = new[:l1]
-
- if len(before) == 0:
- tmp = np.array(new)
- else:
- tmp = np.array(before) + np.array(new)
- return tmp.tolist()
-
-
-
Source code for pyFTS.data.common
-
-import pandas as pd
-import numpy as np
-import os
-import pkg_resources
-from pathlib import Path
-from urllib import request
-
-
-[docs]def get_dataframe(filename, url, sep=";", compression='infer'):
- """
- This method check if filename already exists, read the file and return its data.
- If the file don't already exists, it will be downloaded and decompressed.
-
- :param filename: dataset local filename
- :param url: dataset internet URL
- :param sep: CSV field separator
- :param compression: type of compression
- :return: Pandas dataset
- """
-
- tmp_file = Path(filename)
-
- if tmp_file.is_file():
- return pd.read_csv(filename, sep=sep, compression=compression)
- else:
- request.urlretrieve(url, filename)
- return pd.read_csv(filename, sep=sep, compression=compression)
-
-
-
Source code for pyFTS.data.henon
-"""
-M. Hénon. "A two-dimensional mapping with a strange attractor". Commun. Math. Phys. 50, 69-77 (1976)
-
-dx/dt = a + by(t-1) - x(t-1)^2
-dy/dt = x
-"""
-
-import numpy as np
-import pandas as pd
-
-
-[docs]def get_data(var, a=1.4, b=0.3, initial_values = [1, 1], iterations=1000):
- """
- Get a simple univariate time series data.
-
- :param var: the dataset field name to extract
- :return: numpy array
- """
- return get_dataframe(a,b, initial_values, iterations)[var].values
-
-
-[docs]def get_dataframe(a=1.4, b=0.3, initial_values = [1, 1], iterations=1000):
- '''
- Return a dataframe with the bivariate Henon Map time series (x, y).
-
- :param a: Equation coefficient
- :param b: Equation coefficient
- :param initial_values: numpy array with the initial values of x and y. Default: [1, 1]
- :param iterations: number of iterations. Default: 1000
- :return: Panda dataframe with the x and y values
- '''
-
- x = [initial_values[0]]
- y = [initial_values[1]]
- for t in np.arange(0, iterations):
- xx = a + b * y[t] - x[t] ** 2
- y.append(x[t])
- x.append(xx)
-
- return pd.DataFrame({'x': x, 'y':y})
-
Source code for pyFTS.data.logistic_map
-"""
-May, Robert M. (1976). "Simple mathematical models with very complicated dynamics".
-Nature. 261 (5560): 459–467. doi:10.1038/261459a0.
-
-x(t) = r * x(t-1) * (1 - x(t -1) )
-"""
-
-import numpy as np
-
-
-[docs]def get_data(r = 4, initial_value = 0.3, iterations=100):
- '''
- Return a list with the logistic map chaotic time series.
-
- :param r: Equation coefficient
- :param initial_value: Initial value of x. Default: 0.3
- :param iterations: number of iterations. Default: 100
- :return:
- '''
- x = [initial_value]
- for t in np.arange(0,iterations):
- x.append(r * x[t]*(1 - x[t]))
-
- return x
-
Source code for pyFTS.data.lorentz
-"""
-Lorenz, Edward Norton (1963). "Deterministic nonperiodic flow". Journal of the Atmospheric Sciences. 20 (2): 130–141.
-https://doi.org/10.1175/1520-0469(1963)020<0130:DNF>2.0.CO;2
-
-dx/dt = a(y -x)
-dy/dt = x(b - z) - y
-dz/dt = xy - cz
-"""
-
-import numpy as np
-import pandas as pd
-
-
-[docs]def get_data(var, a = 10.0, b = 28.0, c = 8.0 / 3.0, dt = 0.01,
- initial_values = [0.1, 0, 0], iterations=1000):
- """
- Get a simple univariate time series data.
-
- :param var: the dataset field name to extract
- :return: numpy array
- """
- return get_dataframe(a, b, c, dt, initial_values, iterations)[var].values
-
-
-[docs]def get_dataframe(a = 10.0, b = 28.0, c = 8.0 / 3.0, dt = 0.01,
- initial_values = [0.1, 0, 0], iterations=1000):
- '''
- Return a dataframe with the multivariate Lorenz Map time series (x, y, z).
-
- :param a: Equation coefficient. Default value: 10
- :param b: Equation coefficient. Default value: 28
- :param c: Equation coefficient. Default value: 8.0/3.0
- :param dt: Time differential for continuous time integration. Default value: 0.01
- :param initial_values: numpy array with the initial values of x,y and z. Default: [0.1, 0, 0]
- :param iterations: number of iterations. Default: 1000
- :return: Panda dataframe with the x, y and z values
- '''
-
- x = [initial_values[0]]
- y = [initial_values[1]]
- z = [initial_values[2]]
-
- for t in np.arange(0, iterations):
- dxdt = a * (y[t] - x[t])
- dydt = x[t] * (b - z[t]) - y[t]
- dzdt = x[t] * y[t] - c * z[t]
- x.append(x[t] + dt * dxdt)
- y.append(y[t] + dt * dydt)
- z.append(z[t] + dt * dzdt)
-
- return pd.DataFrame({'x': x, 'y':y, 'z': z})
-
Source code for pyFTS.data.mackey_glass
-"""
-Mackey, M. C. and Glass, L. (1977). Oscillation and chaos in physiological control systems.
-Science, 197(4300):287-289.
-
-dy/dt = -by(t)+ cy(t - tau) / 1+y(t-tau)^10
-"""
-
-import numpy as np
-
-
-[docs]def get_data(b=0.1, c=0.2, tau=17, initial_values = np.linspace(0.5,1.5, 18), iterations=1000):
- '''
- Return a list with the Mackey-Glass chaotic time series.
-
- :param b: Equation coefficient
- :param c: Equation coefficient
- :param tau: Lag parameter, default: 17
- :param initial_values: numpy array with the initial values of y. Default: np.linspace(0.5,1.5,18)
- :param iterations: number of iterations. Default: 1000
- :return:
- '''
- y = initial_values.tolist()
-
- for n in np.arange(len(y)-1, iterations+100):
- y.append(y[n] - b * y[n] + c * y[n - tau] / (1 + y[n - tau] ** 10))
-
- return y[100:]
-
Source code for pyFTS.data.rossler
-"""
-O. E. Rössler, Phys. Lett. 57A, 397 (1976).
-
-dx/dt = -z - y
-dy/dt = x + ay
-dz/dt = b + z( x - c )
-
-"""
-
-import numpy as np
-import pandas as pd
-
-
-[docs]def get_data(var, a = 0.2, b = 0.2, c = 5.7, dt = 0.01,
- initial_values = [0.001, 0.001, 0.001], iterations=5000):
- """
- Get a simple univariate time series data.
-
- :param var: the dataset field name to extract
- :return: numpy array
- """
- return get_dataframe(a, b, c, dt, initial_values, iterations)[var].values
-
-
-[docs]def get_dataframe(a = 0.2, b = 0.2, c = 5.7, dt = 0.01,
- initial_values = [0.001, 0.001, 0.001], iterations=5000):
- '''
- Return a dataframe with the multivariate Rössler Map time series (x, y, z).
-
- :param a: Equation coefficient. Default value: 0.2
- :param b: Equation coefficient. Default value: 0.2
- :param c: Equation coefficient. Default value: 5.7
- :param dt: Time differential for continuous time integration. Default value: 0.01
- :param initial_values: numpy array with the initial values of x,y and z. Default: [0.001, 0.001, 0.001]
- :param iterations: number of iterations. Default: 5000
- :return: Panda dataframe with the x, y and z values
- '''
-
- x = [initial_values[0]]
- y = [initial_values[1]]
- z = [initial_values[2]]
-
- for t in np.arange(0, iterations):
- dxdt = - (y[t] + z[t])
- dydt = x[t] + a * y[t]
- dzdt = b + z[t] * x[t] - z[t] * c
- x.append(x[t] + dt * dxdt)
- y.append(y[t] + dt * dydt)
- z.append(z[t] + dt * dzdt)
-
- return pd.DataFrame({'x': x, 'y':y, 'z': z})
-
Source code for pyFTS.data.sunspots
-"""
-Monthly sunspot numbers from 1749 to May 2016
-
-Source: https://www.esrl.noaa.gov/psd/gcos_wgsp/Timeseries/SUNSPOT/
-"""
-
-from pyFTS.data import common
-import pandas as pd
-import numpy as np
-
-[docs]def get_data():
- """
- Get a simple univariate time series data.
-
- :return: numpy array
- """
- dat = get_dataframe()
- dat = np.array(dat["SUNACTIVITY"])
- return dat
-
-[docs]def get_dataframe():
- """
- Get the complete multivariate time series data.
-
- :return: Pandas DataFrame
- """
- dat = common.get_dataframe('sunspots.csv',
- 'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/sunspots.csv',
- sep=",")
- return dat
-
-
Source code for pyFTS.distributed.spark
-import numpy as np
-import pandas as pd
-
-from pyFTS.data import Enrollments, TAIEX
-from pyFTS.partitioners import Grid, Simple
-from pyFTS.models.multivariate import partitioner as mv_partitioner
-from pyFTS.models import hofts
-
-from pyspark import SparkConf
-from pyspark import SparkContext
-
-import os
-# make sure pyspark tells workers to use python3 not 2 if both are installed
-SPARK_ADDR = 'spark://192.168.0.110:7077'
-
-os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
-os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3'
-
-[docs]def create_spark_conf(**kwargs):
- spark_executor_memory = kwargs.get("spark_executor_memory", "2g")
- spark_driver_memory = kwargs.get("spark_driver_memory", "2g")
- url = kwargs.get("url", SPARK_ADDR)
- app = kwargs.get("app", 'pyFTS')
-
- conf = SparkConf()
- conf.setMaster(url)
- conf.setAppName(app)
- conf.set("spark.executor.memory", spark_executor_memory)
- conf.set("spark.driver.memory", spark_driver_memory)
- conf.set("spark.memory.offHeap.enabled",True)
- conf.set("spark.memory.offHeap.size","16g")
-
- return conf
-
-[docs]def get_partitioner(shared_partitioner, type='common', variables=[]):
- """
-
- :param part:
- :return:
- """
- if type=='common':
- fs_tmp = Simple.SimplePartitioner()
-
- for fset in shared_partitioner.value.keys():
- fz = shared_partitioner.value[fset]
- if type=='common':
- fs_tmp.append_complex(fz)
- elif type == 'multivariate':
- fs_tmp.append(fz)
-
- return fs_tmp
-
-
-[docs]def get_clustered_partitioner(explanatory_variables, target_variable, **parameters):
- from pyFTS.models.multivariate.common import MultivariateFuzzySet
- fs_tmp = mv_partitioner.MultivariatePartitioner(explanatory_variables=explanatory_variables,
- target_variable=target_variable)
- for tmp in parameters['partitioner_names'].value:
- fs = MultivariateFuzzySet(target_variable=target_variable)
- for var, fset in parameters['partitioner_{}'.format(tmp)].value:
- fs.append_set(var, fset)
- fs_tmp.append(fs)
-
- fs_tmp.build_index()
-
- return fs_tmp
-
-
-[docs]def get_variables(**parameters):
- explanatory_variables = []
- target_variable = None
- for name in parameters['variables'].value:
- from pyFTS.models.multivariate import common, variable
- var = variable.Variable(name,
- type=parameters['{}_type'.format(name)].value,
- data_label=parameters['{}_label'.format(name)].value,
- alpha_cut=parameters['{}_alpha'.format(name)].value,
- #data_type=parameters['{}_data_type'.format(name)].value,
- #mask=parameters['{}_mask'.format(name)].value,
- )
- var.partitioner = get_partitioner(parameters['{}_partitioner'.format(name)])
- var.partitioner.type = parameters['{}_partitioner_type'.format(name)].value
-
- explanatory_variables.append(var)
-
- if var.name == parameters['target'].value:
- target_variable = var
-
- return (explanatory_variables, target_variable)
-
-[docs]def create_univariate_model(**parameters):
- if parameters['order'].value > 1:
- model = parameters['method'].value(partitioner=get_partitioner(parameters['partitioner']),
- order=parameters['order'].value, alpha_cut=parameters['alpha_cut'].value,
- lags=parameters['lags'].value)
- else:
- model = parameters['method'].value(partitioner=get_partitioner(parameters['partitioner']),
- alpha_cut=parameters['alpha_cut'].value)
-
- return model
-
-[docs]def slave_train_univariate(data, **parameters):
- """
-
- :param data:
- :return:
- """
-
- model = create_univariate_model(**parameters)
-
- ndata = [k for k in data]
-
- model.train(ndata)
-
- return [(k, model.flrgs[k]) for k in model.flrgs.keys()]
-
-
-[docs]def slave_forecast_univariate(data, **parameters):
- """
-
- :param data:
- :return:
- """
-
- model = create_univariate_model(**parameters)
-
- ndata = [k for k in data]
-
- forecasts = model.predict(ndata)
-
- return [(k, k) for k in forecasts]
-
-
-[docs]def create_multivariate_model(**parameters):
- explanatory_variables, target_variable = get_variables(**parameters)
- #vars = [(v.name, v.name) for v in explanatory_variables]
-
- #return [('vars', vars), ('target',[target_variable.name])]
-
- if parameters['type'].value == 'clustered':
- fs = get_clustered_partitioner(explanatory_variables, target_variable, **parameters)
- model = parameters['method'].value(explanatory_variables=explanatory_variables,
- target_variable=target_variable,
- partitioner=fs,
- order=parameters['order'].value,
- alpha_cut=parameters['alpha_cut'].value,
- lags=parameters['lags'].value)
- else:
-
- if parameters['order'].value > 1:
- model = parameters['method'].value(explanatory_variables=explanatory_variables,
- target_variable=target_variable,
- order=parameters['order'].value,
- alpha_cut=parameters['alpha_cut'].value,
- lags=parameters['lags'].value)
- else:
- model = parameters['method'].value(explanatory_variables=explanatory_variables,
- target_variable=target_variable,
- alpha_cut=parameters['alpha_cut'].value)
-
- return model
-
-
-[docs]def slave_train_multivariate(data, **parameters):
-
- model = create_multivariate_model(**parameters)
-
- rows = [k for k in data]
- ndata = pd.DataFrame.from_records(rows, columns=parameters['columns'].value)
-
- model.train(ndata)
-
- if parameters['type'].value == 'clustered':
- counts = [(fset, count) for fset,count in model.partitioner.count.items()]
- flrgs = [(k, v) for k,v in model.flrgs.items()]
-
- return [('counts', counts), ('flrgs', flrgs)]
- else:
- return [(k, v) for k,v in model.flrgs.items()]
-
-
-[docs]def slave_forecast_multivariate(data, **parameters):
-
- model = create_multivariate_model(**parameters)
-
- rows = [k for k in data]
- ndata = pd.DataFrame.from_records(rows, columns=parameters['columns'].value)
-
- forecasts = model.predict(ndata)
-
- return [(k, k) for k in forecasts]
-
-
-
-
-
-[docs]def distributed_train(model, data, **kwargs):
- """
-
-
- :param model:
- :param data:
- :param url:
- :param app:
- :return:
- """
-
- num_batches = kwargs.get("num_batches", 4)
-
- conf = create_spark_conf(**kwargs)
-
- with SparkContext(conf=conf) as context:
-
- nodes = context.defaultParallelism
-
- parameters = share_parameters(model, context, data)
-
- if not model.is_multivariate:
- func = lambda x: slave_train_univariate(x, **parameters)
-
- flrgs = context.parallelize(data).repartition(nodes*num_batches).mapPartitions(func)
-
- for k in flrgs.collect():
- model.append_rule(k[1])
-
- else:
-
- data = data.to_dict(orient='records')
-
- func = lambda x: slave_train_multivariate(x, **parameters)
-
- flrgs = context.parallelize(data).mapPartitions(func)
-
- for k in flrgs.collect():
- if parameters['type'].value == 'clustered':
- if k[0] == 'counts':
- for fset, count in k[1]:
- model.partitioner.count[fset] = count
- elif k[0] == 'flrgs':
- model.append_rule(k[1])
- else:
- model.append_rule(k[1])
-
- return model
-
-
-[docs]def distributed_predict(data, model, **kwargs):
- """
-
-
- :param model:
- :param data:
- :param url:
- :param app:
- :return:
- """
-
- num_batches = kwargs.get("num_batches", 4)
-
- conf = create_spark_conf(**kwargs)
-
- ret = []
-
- with SparkContext(conf=conf) as context:
-
- nodes = context.defaultParallelism
-
- parameters = share_parameters(model, context)
-
- if not model.is_multivariate:
- func = lambda x: slave_forecast_univariate(x, **parameters)
-
- forecasts = context.parallelize(data).repartition(nodes * num_batches).mapPartitions(func)
-
- else:
-
- data = data.to_dict(orient='records')
-
- func = lambda x: slave_forecast_multivariate(x, **parameters)
-
- forecasts = context.parallelize(data).repartition(nodes * num_batches).mapPartitions(func)
-
- for k in forecasts.collect():
- ret.extend(k)
-
- return ret
-
Source code for pyFTS.hyperparam.Util
-"""
-Common facilities for hyperparameter tunning
-"""
-
-import sqlite3
-
-[docs]def open_hyperparam_db(name):
- """
- Open a connection with a Sqlite database designed to store benchmark results.
-
- :param name: database filenem
- :return: a sqlite3 database connection
- """
- conn = sqlite3.connect(name)
-
- #performance optimizations
- conn.execute("PRAGMA journal_mode = WAL")
- conn.execute("PRAGMA synchronous = NORMAL")
-
- create_hyperparam_tables(conn)
- return conn
-
-
-[docs]def create_hyperparam_tables(conn):
- """
- Create a sqlite3 table designed to store benchmark results.
-
- :param conn: a sqlite3 database connection
- """
- c = conn.cursor()
-
- c.execute('''CREATE TABLE if not exists hyperparam(
- ID integer primary key, Date int, Dataset text, Tag text,
- Model text, Transformation text, mf text, 'Order' int,
- Partitioner text, Partitions int, alpha real, lags text,
- Measure text, Value real)''')
-
- conn.commit()
-
-
-[docs]def insert_hyperparam(data, conn):
- """
- Insert benchmark data on database
-
- :param data: a tuple with the benchmark data with format:
-
- Dataset: Identify on which dataset the dataset was performed
- Tag: a user defined word that indentify a benchmark set
- Model: FTS model
- Transformation: The name of data transformation, if one was used
- mf: membership function
- Order: the order of the FTS method
- Partitioner: UoD partitioning scheme
- Partitions: Number of partitions
- alpha: alpha cut
- lags: lags
- Measure: accuracy measure
- Value: the measure value
-
- :param conn: a sqlite3 database connection
- :return:
- """
- c = conn.cursor()
-
- c.execute("INSERT INTO hyperparam(Date, Dataset, Tag, Model, "
- + "Transformation, mf, 'Order', Partitioner, Partitions, "
- + "alpha, lags, Measure, Value) "
- + "VALUES(datetime('now'),?,?,?,?,?,?,?,?,?,?,?,?)", data)
- conn.commit()
-
Source code for pyFTS.models.chen
-"""
-First Order Conventional Fuzzy Time Series by Chen (1996)
-
-S.-M. Chen, “Forecasting enrollments based on fuzzy time series,” Fuzzy Sets Syst., vol. 81, no. 3, pp. 311–319, 1996.
-"""
-
-import numpy as np
-from pyFTS.common import FuzzySet, FLR, fts, flrg
-
-
-[docs]class ConventionalFLRG(flrg.FLRG):
- """First Order Conventional Fuzzy Logical Relationship Group"""
- def __init__(self, LHS, **kwargs):
- super(ConventionalFLRG, self).__init__(1, **kwargs)
- self.LHS = LHS
- self.RHS = set()
-
-
-
-
-
- def __str__(self):
- tmp = str(self.LHS) + " -> "
- tmp2 = ""
- for c in sorted(self.RHS, key=lambda s: s):
- if len(tmp2) > 0:
- tmp2 = tmp2 + ","
- tmp2 = tmp2 + str(c)
- return tmp + tmp2
-
-
-[docs]class ConventionalFTS(fts.FTS):
- """Conventional Fuzzy Time Series"""
- def __init__(self, **kwargs):
- super(ConventionalFTS, self).__init__(order=1, **kwargs)
- self.name = "Conventional FTS"
- self.detail = "Chen"
- self.shortname = "CFTS"
- self.flrgs = {}
-
-[docs] def generate_flrg(self, flrs):
- for flr in flrs:
- if flr.LHS in self.flrgs:
- self.flrgs[flr.LHS].append_rhs(flr.RHS)
- else:
- self.flrgs[flr.LHS] = ConventionalFLRG(flr.LHS)
- self.flrgs[flr.LHS].append_rhs(flr.RHS)
-
-[docs] def train(self, data, **kwargs):
-
- tmpdata = self.partitioner.fuzzyfy(data, method='maximum', mode='sets')
- flrs = FLR.generate_non_recurrent_flrs(tmpdata)
- self.generate_flrg(flrs)
-
-[docs] def forecast(self, ndata, **kwargs):
-
- explain = kwargs.get('explain',False)
-
- l = len(ndata) if not explain else 1
-
- ret = []
-
- for k in np.arange(0, l):
-
- actual = FuzzySet.get_maximum_membership_fuzzyset(ndata[k], self.sets)
-
- if explain:
- print("Fuzzyfication:\n\n {} -> {} \n".format(ndata[k], actual.name))
-
- if actual.name not in self.flrgs:
- ret.append(actual.centroid)
-
- if explain:
- print("Rules:\n\n {} -> {} (Naïve)\t Midpoint: {} \n\n".format(actual.name, actual.name,actual.centroid))
-
- else:
- _flrg = self.flrgs[actual.name]
-
- mp = _flrg.get_midpoint(self.sets)
-
- ret.append(mp)
-
- if explain:
- print("Rules:\n\n {} \t Midpoint: {} \n".format(str(_flrg), mp))
-
- print("Deffuzyfied value: {} \n".format(mp))
-
- return ret
-
Source code for pyFTS.models.cheng
-"""
-Trend Weighted Fuzzy Time Series by Cheng, Chen and Wu (2009)
-
-C.-H. Cheng, Y.-S. Chen, and Y.-L. Wu, “Forecasting innovation diffusion of products using trend-weighted fuzzy time-series model,”
-Expert Syst. Appl., vol. 36, no. 2, pp. 1826–1832, 2009.
-"""
-
-import numpy as np
-from pyFTS.common import FuzzySet, FLR, fts
-from pyFTS.models import yu
-
-
-[docs]class TrendWeightedFLRG(yu.WeightedFLRG):
- """
- First Order Trend Weighted Fuzzy Logical Relationship Group
- """
- def __init__(self, LHS, **kwargs):
- super(TrendWeightedFLRG, self).__init__(LHS, **kwargs)
- self.w = None
-
-[docs] def weights(self, sets):
- if self.w is None:
- count_nochange = 0.0
- count_up = 0.0
- count_down = 0.0
- weights = []
-
- for c in self.RHS:
- tmp = 0
- if sets[self.LHS].centroid == sets[c].centroid:
- count_nochange += 1.0
- tmp = count_nochange
- elif sets[self.LHS].centroid > sets[c].centroid:
- count_down += 1.0
- tmp = count_down
- else:
- count_up += 1.0
- tmp = count_up
- weights.append(tmp)
-
- tot = sum(weights)
- self.w = np.array([k / tot for k in weights])
- return self.w
-
-
-[docs]class TrendWeightedFTS(yu.WeightedFTS):
- """First Order Trend Weighted Fuzzy Time Series"""
- def __init__(self, **kwargs):
- super(TrendWeightedFTS, self).__init__(**kwargs)
- self.shortname = "TWFTS"
- self.name = "Trend Weighted FTS"
- self.detail = "Cheng"
- self.is_high_order = False
-
-[docs] def generate_FLRG(self, flrs):
- for flr in flrs:
- if flr.LHS in self.flrgs:
- self.flrgs[flr.LHS].append_rhs(flr.RHS)
- else:
- self.flrgs[flr.LHS] = TrendWeightedFLRG(flr.LHS)
- self.flrgs[flr.LHS].append_rhs(flr.RHS)
-
Source code for pyFTS.models.ensemble.ensemble
-"""
-EnsembleFTS wraps several FTS methods to ensemble their forecasts, providing point,
-interval and probabilistic forecasting.
-
-Silva, P. C. L et al. Probabilistic Forecasting with Seasonal Ensemble Fuzzy Time-Series
-XIII Brazilian Congress on Computational Intelligence, 2017. Rio de Janeiro, Brazil.
-"""
-
-
-import numpy as np
-import pandas as pd
-from pyFTS.common import SortedCollection, fts, tree
-from pyFTS.models import chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu
-from pyFTS.probabilistic import ProbabilityDistribution
-import scipy.stats as st
-
-
-[docs]def sampler(data, quantiles):
- ret = []
- for qt in quantiles:
- ret.append(np.nanpercentile(data, q=qt * 100))
- return ret
-
-
-[docs]class EnsembleFTS(fts.FTS):
- """
- Ensemble FTS
- """
- def __init__(self, **kwargs):
- super(EnsembleFTS, self).__init__(**kwargs)
- self.shortname = "EnsembleFTS"
- self.name = "Ensemble FTS"
- self.flrgs = {}
- self.is_wrapper = True
- self.has_point_forecasting = True
- self.has_interval_forecasting = True
- self.has_probability_forecasting = True
- self.is_high_order = True
- self.models = []
- """A list of FTS models, the ensemble components"""
- self.parameters = []
- """A list with the parameters for each component model"""
- self.alpha = kwargs.get("alpha", 0.05)
- """The quantiles """
- self.point_method = kwargs.get('point_method', 'mean')
- """The method used to mix the several model's forecasts into a unique point forecast. Options: mean, median, quantile, exponential"""
- self.interval_method = kwargs.get('interval_method', 'quantile')
- """The method used to mix the several model's forecasts into a interval forecast. Options: quantile, extremum, normal"""
-
-[docs] def append_model(self, model):
- """
- Append a new trained model to the ensemble
-
- :param model: FTS model
-
- """
- self.models.append(model)
- if model.order > self.order:
- self.order = model.order
-
- if model.is_multivariate:
- self.is_multivariate = True
-
- if model.has_seasonality:
- self.has_seasonality = True
-
- if model.original_min < self.original_min:
- self.original_min = model.original_min
- elif model.original_max > self.original_max:
- self.original_max = model.original_max
-
-
-
-[docs] def get_models_forecasts(self,data):
- tmp = []
- for model in self.models:
- if model.is_multivariate or model.has_seasonality:
- forecast = model.forecast(data)
- else:
-
- if isinstance(data, pd.DataFrame) and self.indexer is not None:
- data = self.indexer.get_data(data)
-
- sample = data[-model.order:]
- forecast = model.predict(sample)
- if isinstance(forecast, (list,np.ndarray)) and len(forecast) > 0:
- forecast = forecast[-1]
- elif isinstance(forecast, (list,np.ndarray)) and len(forecast) == 0:
- forecast = np.nan
- if isinstance(forecast, list):
- tmp.extend(forecast)
- else:
- tmp.append(forecast)
- return tmp
-
-[docs] def get_point(self,forecasts, **kwargs):
- if self.point_method == 'mean':
- ret = np.nanmean(forecasts)
- elif self.point_method == 'median':
- ret = np.nanpercentile(forecasts, 50)
- elif self.point_method == 'quantile':
- alpha = kwargs.get("alpha",0.05)
- ret = np.percentile(forecasts, alpha*100)
- elif self.point_method == 'exponential':
- l = len(self.models)
- if l == 1:
- return forecasts[0]
- w = np.array([np.exp(-(l - k)) for k in range(l)])
- w = w / np.nansum(w)
- ret = np.nansum([w[k] * forecasts[k] for k in range(l)])
-
- return ret
-
-[docs] def get_interval(self, forecasts):
- ret = []
- if self.interval_method == 'extremum':
- ret.append([min(forecasts), max(forecasts)])
- elif self.interval_method == 'quantile':
- qt_lo = np.nanpercentile(forecasts, q=self.alpha * 100)
- qt_up = np.nanpercentile(forecasts, q=(1-self.alpha) * 100)
- ret.append([qt_lo, qt_up])
- elif self.interval_method == 'normal':
- mu = np.nanmean(forecasts)
- sigma = np.sqrt(np.nanvar(forecasts))
- ret.append(mu + st.norm.ppf(self.alpha) * sigma)
- ret.append(mu + st.norm.ppf(1 - self.alpha) * sigma)
-
- return ret
-
-[docs] def get_distribution_interquantile(self,forecasts, alpha):
- size = len(forecasts)
- qt_lower = int(np.ceil(size * alpha)) - 1
- qt_upper = int(np.ceil(size * (1- alpha))) - 1
-
- ret = sorted(forecasts)[qt_lower : qt_upper]
-
- return ret
-
-[docs] def forecast(self, data, **kwargs):
-
- if "method" in kwargs:
- self.point_method = kwargs.get('method','mean')
-
- l = len(data)
- ret = []
-
- for k in np.arange(self.order, l+1):
- sample = data[k - self.max_lag : k]
- tmp = self.get_models_forecasts(sample)
- point = self.get_point(tmp)
- ret.append(point)
-
- return ret
-
-[docs] def forecast_interval(self, data, **kwargs):
-
- if "method" in kwargs:
- self.interval_method = kwargs.get('method','quantile')
-
- if 'alpha' in kwargs:
- self.alpha = kwargs.get('alpha',0.05)
-
- l = len(data)
-
- ret = []
-
- for k in np.arange(self.order, l+1):
- sample = data[k - self.order : k]
- tmp = self.get_models_forecasts(sample)
- interval = self.get_interval(tmp)
- if len(interval) == 1:
- interval = interval[-1]
- ret.append(interval)
-
- return ret
-
-[docs] def forecast_ahead_interval(self, data, steps, **kwargs):
-
- if 'method' in kwargs:
- self.interval_method = kwargs.get('method','quantile')
-
- if 'alpha' in kwargs:
- self.alpha = kwargs.get('alpha', self.alpha)
-
- ret = []
-
- samples = [[k] for k in data[-self.order:]]
-
- for k in np.arange(self.order, steps + self.order):
- forecasts = []
- lags = {}
- for i in np.arange(0, self.order): lags[i] = samples[k - self.order + i]
-
- # Build the tree with all possible paths
-
- root = tree.FLRGTreeNode(None)
-
- tree.build_tree_without_order(root, lags, 0)
-
- for p in root.paths():
- path = list(reversed(list(filter(None.__ne__, p))))
-
- forecasts.extend(self.get_models_forecasts(path))
-
- samples.append(sampler(forecasts, np.arange(0.1, 1, 0.2)))
- interval = self.get_interval(forecasts)
-
- if len(interval) == 1:
- interval = interval[0]
-
- ret.append(interval)
-
- return ret
-
-[docs] def forecast_distribution(self, data, **kwargs):
- ret = []
-
- smooth = kwargs.get("smooth", "KDE")
- alpha = kwargs.get("alpha", None)
-
- uod = self.get_UoD()
-
- for k in np.arange(self.order, len(data)):
-
- sample = data[k-self.order : k]
-
- forecasts = self.get_models_forecasts(sample)
-
- if alpha is None:
- forecasts = np.ravel(forecasts).tolist()
- else:
- forecasts = self.get_distribution_interquantile(np.ravel(forecasts).tolist(), alpha)
-
- dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, data=forecasts,
- name="", **kwargs)
-
- ret.append(dist)
-
- return ret
-
-
-[docs] def forecast_ahead_distribution(self, data, steps, **kwargs):
- if 'method' in kwargs:
- self.point_method = kwargs.get('method','mean')
-
- smooth = kwargs.get("smooth", "KDE")
- alpha = kwargs.get("alpha", None)
-
- ret = []
-
- start = kwargs.get('start', self.order)
-
- uod = self.get_UoD()
-
- sample = data[start - self.order: start]
-
- for k in np.arange(self.order, steps+self.order):
- forecasts = []
- lags = {}
- for i in np.arange(0, self.order): lags[i] = sample[k-self.order]
-
- # Build the tree with all possible paths
-
- root = tree.FLRGTreeNode(None)
-
- tree.build_tree_without_order(root, lags, 0)
-
- for p in root.paths():
- path = list(reversed(list(filter(None.__ne__, p))))
-
- forecasts.extend(self.get_models_forecasts(path))
-
- sample.append(sampler(forecasts, np.arange(0.1, 1, 0.1)))
-
- if alpha is None:
- forecasts = np.ravel(forecasts).tolist()
- else:
- forecasts = self.get_distribution_interquantile(np.ravel(forecasts).tolist(), alpha)
-
- dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, data=forecasts,
- name="", **kwargs)
-
- ret.append(dist)
-
- return ret
-
-
-[docs]class AllMethodEnsembleFTS(EnsembleFTS):
- """
- Creates an EnsembleFTS with all point forecast methods, sharing the same partitioner
- """
- def __init__(self, **kwargs):
- super(AllMethodEnsembleFTS, self).__init__(**kwargs)
- self.min_order = 3
- self.shortname ="Ensemble FTS"
-
-[docs] def set_transformations(self, model):
- for t in self.transformations:
- model.append_transformation(t)
-
-[docs] def train(self, data, **kwargs):
- fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS,
- sadaei.ExponentialyWeightedFTS, ismailefendi.ImprovedWeightedFTS]
-
- ho_methods = [hofts.HighOrderFTS, hwang.HighOrderFTS]
-
- for method in fo_methods:
- model = method(partitioner=self.partitioner)
- self.set_transformations(model)
- model.fit(data, **kwargs)
- self.append_model(model)
-
- for method in ho_methods:
- for o in np.arange(1, self.order+1):
- model = method(partitioner=self.partitioner)
- if model.min_order >= o:
- model.order = o
- self.set_transformations(model)
- model.fit(data, **kwargs)
- self.append_model(model)
-
-
-
-
-
Source code for pyFTS.models.ensemble.multiseasonal
-"""
-Silva, P. C. L et al. Probabilistic Forecasting with Seasonal Ensemble Fuzzy Time-Series
-XIII Brazilian Congress on Computational Intelligence, 2017. Rio de Janeiro, Brazil.
-"""
-
-import numpy as np
-from pyFTS.common import Util as cUtil
-from pyFTS.models.ensemble import ensemble
-from pyFTS.models.seasonal import cmsfts
-from pyFTS.probabilistic import ProbabilityDistribution
-from copy import deepcopy
-from joblib import Parallel, delayed
-import multiprocessing
-
-
-[docs]def train_individual_model(partitioner, train_data, indexer):
- pttr = str(partitioner.__module__).split('.')[-1]
- diff = "_diff" if partitioner.transformation is not None else ""
- _key = "msfts_" + pttr + str(partitioner.partitions) + diff + "_" + indexer.name
-
- print(_key)
-
- model = cmsfts.ContextualMultiSeasonalFTS(_key, indexer=indexer)
- model.append_transformation(partitioner.transformation)
- model.train(train_data, partitioner.sets, order=1)
-
- cUtil.persist_obj(model, "models/"+_key+".pkl")
-
- return model
-
-
-[docs]class SeasonalEnsembleFTS(ensemble.EnsembleFTS):
- def __init__(self, name, **kwargs):
- super(SeasonalEnsembleFTS, self).__init__(name="Seasonal Ensemble FTS", **kwargs)
- self.min_order = 1
- self.indexers = []
- self.partitioners = []
- self.is_multivariate = True
- self.has_seasonality = True
- self.has_probability_forecasting = True
-
-[docs] def update_uod(self, data):
- self.original_max = max(self.indexer.get_data(data))
- self.original_min = min(self.indexer.get_data(data))
-
-[docs] def train(self, data, **kwargs):
- self.original_max = max(self.indexer.get_data(data))
- self.original_min = min(self.indexer.get_data(data))
-
- num_cores = multiprocessing.cpu_count()
-
- pool = {}
- count = 0
- for ix in self.indexers:
- for pt in self.partitioners:
- pool[count] = {'ix': ix, 'pt': pt}
- count += 1
-
- results = Parallel(n_jobs=num_cores)(
- delayed(train_individual_model)(deepcopy(pool[m]['pt']), data, deepcopy(pool[m]['ix']))
- for m in pool.keys())
-
- for tmp in results:
- self.append_model(tmp)
-
- cUtil.persist_obj(self, "models/"+self.name+".pkl")
-
-[docs] def forecast_distribution(self, data, **kwargs):
-
- ret = []
-
- smooth = kwargs.get("smooth", "KDE")
- alpha = kwargs.get("alpha", None)
-
- uod = self.get_UoD()
-
- for k in data.index:
-
- tmp = self.get_models_forecasts(data.ix[k])
-
- if alpha is None:
- tmp = np.ravel(tmp).tolist()
- else:
- tmp = self.get_distribution_interquantile( np.ravel(tmp).tolist(), alpha)
-
- name = str(self.indexer.get_index(data.ix[k]))
-
- dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, data=tmp,
- name=name, **kwargs)
-
- ret.append(dist)
-
- return ret
-
Source code for pyFTS.models.hofts
-"""
-High Order FTS
-
-Severiano, S. A. Jr; Silva, P. C. L.; Sadaei, H. J.; Guimarães, F. G. Very Short-term Solar Forecasting
-using Fuzzy Time Series. 2017 IEEE International Conference on Fuzzy Systems. DOI10.1109/FUZZ-IEEE.2017.8015732
-"""
-
-import numpy as np
-from pyFTS.common import FuzzySet, FLR, fts, flrg
-from itertools import product
-
-
-[docs]class HighOrderFLRG(flrg.FLRG):
- """Conventional High Order Fuzzy Logical Relationship Group"""
- def __init__(self, order, **kwargs):
- super(HighOrderFLRG, self).__init__(order, **kwargs)
- self.LHS = []
- self.RHS = {}
- self.strlhs = ""
-
-
-
-
-
- def __str__(self):
- tmp = ""
- for c in sorted(self.RHS):
- if len(tmp) > 0:
- tmp = tmp + ","
- tmp = tmp + c
- return self.get_key() + " -> " + tmp
-
-
- def __len__(self):
- return len(self.RHS)
-
-
-[docs]class WeightedHighOrderFLRG(flrg.FLRG):
- """Weighted High Order Fuzzy Logical Relationship Group"""
-
- def __init__(self, order, **kwargs):
- super(WeightedHighOrderFLRG, self).__init__(order, **kwargs)
- self.LHS = []
- self.RHS = {}
- self.count = 0.0
- self.strlhs = ""
- self.w = None
-
-[docs] def append_rhs(self, fset, **kwargs):
- count = kwargs.get('count',1.0)
- if fset not in self.RHS:
- self.RHS[fset] = count
- else:
- self.RHS[fset] += count
- self.count += count
-
-
-
-[docs] def weights(self):
- if self.w is None:
- self.w = np.array([self.RHS[c] / self.count for c in self.RHS.keys()])
- return self.w
-
-[docs] def get_midpoint(self, sets):
- if self.midpoint is None:
- mp = np.array([sets[c].centroid for c in self.RHS.keys()])
- self.midpoint = mp.dot(self.weights())
-
- return self.midpoint
-
-[docs] def get_lower(self, sets):
- if self.lower is None:
- lw = np.array([sets[s].lower for s in self.RHS.keys()])
- self.lower = lw.dot(self.weights())
- return self.lower
-
-[docs] def get_upper(self, sets):
- if self.upper is None:
- up = np.array([sets[s].upper for s in self.RHS.keys()])
- self.upper = up.dot(self.weights())
- return self.upper
-
- def __str__(self):
- _str = ""
- for k in self.RHS.keys():
- _str += ", " if len(_str) > 0 else ""
- _str += k + " (" + str(round(self.RHS[k] / self.count, 3)) + ")"
-
- return self.get_key() + " -> " + _str
-
- def __len__(self):
- return len(self.RHS)
-
-
-[docs]class HighOrderFTS(fts.FTS):
- """Conventional High Order Fuzzy Time Series"""
- def __init__(self, **kwargs):
- super(HighOrderFTS, self).__init__(**kwargs)
- self.name = "High Order FTS"
- self.shortname = "HOFTS"
- self.detail = "Severiano, Silva, Sadaei and Guimarães"
- self.is_high_order = True
- self.min_order = 1
- self.order= kwargs.get("order", self.min_order)
- self.configure_lags(**kwargs)
-
-[docs] def configure_lags(self, **kwargs):
- if "order" in kwargs:
- self.order = kwargs.get("order", self.min_order)
-
- if "lags" in kwargs:
- self.lags = kwargs.get("lags", None)
-
- if self.lags is not None:
- self.max_lag = max(self.lags)
- else:
- self.max_lag = self.order
- self.lags = np.arange(1, self.order+1)
-
-[docs] def generate_lhs_flrg(self, sample, explain=False):
-
- nsample = [self.partitioner.fuzzyfy(k, mode="sets", alpha_cut=self.alpha_cut)
- for k in sample]
-
- if explain:
- self.append_log("Fuzzyfication","{} -> {}".format(sample, nsample))
-
- return self.generate_lhs_flrg_fuzzyfied(nsample, explain)
-
-[docs] def generate_lhs_flrg_fuzzyfied(self, sample, explain=False):
- lags = []
- flrgs = []
-
- for ct, o in enumerate(self.lags):
- lhs = sample[o - 1]
- lags.append(lhs)
-
- if explain:
- self.append_log("Ordering Lags", "Lag {} Value {}".format(o, lhs))
-
- # Trace the possible paths
- for path in product(*lags):
- flrg = HighOrderFLRG(self.order)
-
- for lhs in path:
- flrg.append_lhs(lhs)
-
- flrgs.append(flrg)
-
- return flrgs
-
-[docs] def generate_flrg(self, data):
- l = len(data)
- for k in np.arange(self.max_lag, l):
-
- if self.dump: print("FLR: " + str(k))
-
- sample = data[k - self.max_lag: k]
-
- rhs = self.partitioner.fuzzyfy(data[k], mode="sets", alpha_cut=self.alpha_cut)
-
- flrgs = self.generate_lhs_flrg(sample)
-
- for flrg in flrgs:
- if flrg.get_key() not in self.flrgs:
- self.flrgs[flrg.get_key()] = flrg;
-
- for st in rhs:
- self.flrgs[flrg.get_key()].append_rhs(st)
-
-
-[docs] def generate_flrg_fuzzyfied(self, data):
- l = len(data)
- for k in np.arange(self.max_lag, l):
- if self.dump: print("FLR: " + str(k))
-
- sample = data[k - self.max_lag: k]
-
- rhs = data[k]
-
- flrgs = self.generate_lhs_flrg_fuzzyfied(sample)
-
- for flrg in flrgs:
-
- if flrg.get_key() not in self.flrgs:
- self.flrgs[flrg.get_key()] = flrg
-
- for st in rhs:
- self.flrgs[flrg.get_key()].append_rhs(st)
-
-[docs] def train(self, data, **kwargs):
- self.configure_lags(**kwargs)
- if not kwargs.get('fuzzyfied',False):
- self.generate_flrg(data)
- else:
- self.generate_flrg_fuzzyfied(data)
-
-[docs] def forecast(self, ndata, **kwargs):
-
- explain = kwargs.get('explain', False)
-
- fuzzyfied = kwargs.get('fuzzyfied', False)
-
- mode = kwargs.get('mode', 'mean')
-
- ret = []
-
- l = len(ndata) if not explain else self.max_lag + 1
-
- if l < self.max_lag:
- return ndata
- elif l == self.max_lag:
- l += 1
-
- for k in np.arange(self.max_lag, l):
-
- sample = ndata[k - self.max_lag: k]
-
- if not fuzzyfied:
- flrgs = self.generate_lhs_flrg(sample, explain)
- else:
- flrgs = self.generate_lhs_flrg_fuzzyfied(sample, explain)
-
- midpoints = []
- memberships = []
- for flrg in flrgs:
-
- if flrg.get_key() not in self.flrgs:
- if len(flrg.LHS) > 0:
- mp = self.partitioner.sets[flrg.LHS[-1]].centroid
- mv = self.partitioner.sets[flrg.LHS[-1]].membership(sample[-1]) if not fuzzyfied else None
- midpoints.append(mp)
- memberships.append(mv)
-
- if explain:
- self.append_log("Rule Matching", "{} -> {} (Naïve) Midpoint: {}".format(str(flrg.LHS), flrg.LHS[-1],
- mp))
- else:
- flrg = self.flrgs[flrg.get_key()]
- mp = flrg.get_midpoint(self.partitioner.sets)
- mv = flrg.get_membership(sample, self.partitioner.sets) if not fuzzyfied else None
- midpoints.append(mp)
- memberships.append(mv)
-
- if explain:
- self.append_log("Rule Matching", "{}, Midpoint: {} Membership: {}".format(flrg.get_key(), mp, mv))
-
- if mode == "mean" or fuzzyfied:
- final = np.nanmean(midpoints)
- if explain: self.append_log("Deffuzyfication", "By Mean: {}".format(final))
- else:
- final = np.dot(midpoints, memberships)/np.nansum(memberships)
- if explain: self.append_log("Deffuzyfication", "By Memberships: {}".format(final))
-
- ret.append(final)
-
- return ret
-
-
-[docs]class WeightedHighOrderFTS(HighOrderFTS):
- """Weighted High Order Fuzzy Time Series"""
- def __init__(self, **kwargs):
- super(WeightedHighOrderFTS, self).__init__(**kwargs)
- self.name = "Weighted High Order FTS"
- self.shortname = "WHOFTS"
-
-[docs] def generate_lhs_flrg_fuzzyfied(self, sample, explain=False):
- lags = []
- flrgs = []
-
- for ct, o in enumerate(self.lags):
- lags.append(sample[o-1])
-
- if explain:
- print("\t (Lag {}) {} \n".format(o, sample[o-1]))
-
- # Trace the possible paths
- for path in product(*lags):
- flrg = WeightedHighOrderFLRG(self.order)
-
- for lhs in path:
- flrg.append_lhs(lhs)
-
- flrgs.append(flrg)
-
- return flrgs
-
Source code for pyFTS.models.hwang
-"""
-High Order Fuzzy Time Series by Hwang, Chen and Lee (1998)
-
-Jeng-Ren Hwang, Shyi-Ming Chen, and Chia-Hoang Lee, “Handling forecasting problems using fuzzy time series,”
-Fuzzy Sets Syst., no. 100, pp. 217–228, 1998.
-"""
-
-import numpy as np
-from pyFTS.common import FuzzySet, FLR, Transformations, fts
-
-
-[docs]class HighOrderFTS(fts.FTS):
- def __init__(self, **kwargs):
- super(HighOrderFTS, self).__init__(**kwargs)
- self.is_high_order = True
- self.min_order = 2
- self.name = "Hwang High Order FTS"
- self.shortname = "Hwang"
- self.detail = "Hwang"
- self.configure_lags(**kwargs)
-
-[docs] def configure_lags(self, **kwargs):
- if "order" in kwargs:
- self.order = kwargs.get("order", 2)
-
- self.max_lag = self.order
-
-[docs] def forecast(self, ndata, **kwargs):
-
- l = len(self.sets)
-
- cn = np.array([0.0 for k in range(l)])
- ow = np.array([[0.0 for k in range(l)] for z in range(self.order - 1)])
- rn = np.array([[0.0 for k in range(l)] for z in range(self.order - 1)])
- ft = np.array([0.0 for k in range(l)])
-
- ret = []
-
- for t in np.arange(self.order-1, len(ndata)):
-
- for ix in range(l):
- s = self.partitioner.ordered_sets[ix]
- cn[ix] = self.sets[s].membership( FuzzySet.grant_bounds(ndata[t], self.sets, self.partitioner.ordered_sets))
- for w in np.arange(self.order-1):
- ow[w, ix] = self.sets[s].membership(FuzzySet.grant_bounds(ndata[t - w], self.sets, self.partitioner.ordered_sets))
- rn[w, ix] = ow[w, ix] * cn[ix]
- ft[ix] = max(ft[ix], rn[w, ix])
- mft = max(ft)
- out = 0.0
- count = 0.0
- for ix in range(l):
- s = self.partitioner.ordered_sets[ix]
- if ft[ix] == mft:
- out = out + self.sets[s].centroid
- count += 1.0
- ret.append(out / count)
-
- return ret
-
-[docs] def train(self, data, **kwargs):
-
- if self.sets == None:
- self.sets = self.partitioner.sets
-
- self.configure_lags(**kwargs)
-
Source code for pyFTS.models.ifts
-#!/usr/bin/python
-# -*- coding: utf8 -*-
-
-"""
-High Order Interval Fuzzy Time Series
-
-SILVA, Petrônio CL; SADAEI, Hossein Javedani; GUIMARÃES, Frederico Gadelha. Interval Forecasting with Fuzzy Time Series.
-In: Computational Intelligence (SSCI), 2016 IEEE Symposium Series on. IEEE, 2016. p. 1-8.
-"""
-
-import numpy as np
-from pyFTS.common import FuzzySet, FLR, fts, tree
-from pyFTS.models import hofts
-
-
-[docs]class IntervalFTS(hofts.WeightedHighOrderFTS):
- """
- High Order Interval Fuzzy Time Series
- """
- def __init__(self, **kwargs):
- super(IntervalFTS, self).__init__(**kwargs)
- self.shortname = "IFTS"
- self.name = "Interval FTS"
- self.detail = "Silva, P.; Guimarães, F.; Sadaei, H. (2016)"
- self.flrgs = {}
- self.has_point_forecasting = False
- self.has_interval_forecasting = True
- self.is_high_order = True
- self.min_order = 1
-
-[docs] def get_upper(self, flrg):
- ret = np.nan
- if len(flrg.LHS) > 0:
- if flrg.get_key() in self.flrgs:
- tmp = self.flrgs[flrg.get_key()]
- ret = tmp.get_upper(self.sets)
- else:
- ret = self.sets[flrg.LHS[-1]].upper
- return ret
-
-[docs] def get_lower(self, flrg):
- ret = np.nan
- if len(flrg.LHS) > 0:
- if flrg.get_key() in self.flrgs:
- tmp = self.flrgs[flrg.get_key()]
- ret = tmp.get_lower(self.partitioner.sets)
- else:
- ret = self.partitioner.sets[flrg.LHS[-1]].lower
- return ret
-
-[docs] def get_sequence_membership(self, data, fuzzySets):
- mb = [fuzzySets[k].membership(data[k]) for k in np.arange(0, len(data))]
- return mb
-
-
-[docs] def forecast_interval(self, ndata, **kwargs):
-
- ret = []
-
- l = len(ndata)
-
- if l <= self.order:
- return ndata
-
- for k in np.arange(self.max_lag, l+1):
-
- sample = ndata[k - self.max_lag: k]
-
- flrgs = self.generate_lhs_flrg(sample)
-
- up = []
- lo = []
- affected_flrgs_memberships = []
-
- for flrg in flrgs:
- if len(flrg.LHS) > 0:
-
- mv = flrg.get_membership(sample, self.sets)
- up.append(mv * self.get_upper(flrg))
- lo.append(mv * self.get_lower(flrg))
- affected_flrgs_memberships.append(mv)
-
- # gerar o intervalo
- norm = sum(affected_flrgs_memberships)
- lo_ = sum(lo) / norm
- up_ = sum(up) / norm
- ret.append([lo_, up_])
-
- return ret
-
Source code for pyFTS.models.incremental.IncrementalEnsemble
-'''
-Time Variant/Incremental Ensemble of FTS methods
-'''
-
-
-import numpy as np
-import pandas as pd
-from pyFTS.common import FuzzySet, FLR, fts, flrg
-from pyFTS.partitioners import Grid
-from pyFTS.models import hofts
-from pyFTS.models.ensemble import ensemble
-
-
-[docs]class IncrementalEnsembleFTS(ensemble.EnsembleFTS):
- """
- Time Variant/Incremental Ensemble of FTS methods
- """
- def __init__(self, **kwargs):
- super(IncrementalEnsembleFTS, self).__init__(**kwargs)
- self.shortname = "IncrementalEnsembleFTS"
- self.name = "Incremental Ensemble FTS"
-
- self.order = kwargs.get('order',1)
-
- self.partitioner_method = kwargs.get('partitioner_method', Grid.GridPartitioner)
- """The partitioner method to be called when a new model is build"""
- self.partitioner_params = kwargs.get('partitioner_params', {'npart': 10})
- """The partitioner method parameters"""
-
- self.fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS)
- """The FTS method to be called when a new model is build"""
- self.fts_params = kwargs.get('fts_params', {})
- """The FTS method specific parameters"""
-
- self.window_length = kwargs.get('window_length', 100)
- """The memory window length"""
-
- self.batch_size = kwargs.get('batch_size', 10)
- """The batch interval between each retraining"""
-
- self.num_models = kwargs.get('num_models', 5)
- """The number of models to hold in the ensemble"""
-
- self.point_method = kwargs.get('point_method', 'exponential')
-
- self.is_high_order = True
- self.uod_clip = False
- self.max_lag = self.window_length + self.order
-
-[docs] def train(self, data, **kwargs):
-
- partitioner = self.partitioner_method(data=data, **self.partitioner_params)
- model = self.fts_method(partitioner=partitioner, **self.fts_params)
- if model.is_high_order:
- model = self.fts_method(partitioner=partitioner, order=self.order, **self.fts_params)
- model.fit(data, **kwargs)
- self.append_model(model)
- if len(self.models) > self.num_models:
- self.models.pop(0)
-
-[docs] def forecast(self, data, **kwargs):
- l = len(data)
-
- data_window = []
-
- ret = []
-
- for k in np.arange(self.max_lag, l):
-
- k2 = k - self.max_lag
-
- data_window.append(data[k2])
-
- if k2 >= self.window_length:
- data_window.pop(0)
-
- if k % self.batch_size == 0 and k2 >= self.window_length:
- self.train(data_window, **kwargs)
-
- if len(self.models) > 0:
- sample = data[k2: k]
- tmp = self.get_models_forecasts(sample)
- point = self.get_point(tmp)
- ret.append(point)
-
- return ret
-
-
-
-
-
-
-
Source code for pyFTS.models.incremental.TimeVariant
-"""
-Meta model that wraps another FTS method and continously retrain it using a data window with the most recent data
-"""
-
-import numpy as np
-from pyFTS.common import FuzzySet, FLR, fts, flrg
-from pyFTS.partitioners import Grid
-
-
-[docs]class Retrainer(fts.FTS):
- """
- Meta model for incremental/online learning
- """
- def __init__(self, **kwargs):
- super(Retrainer, self).__init__(**kwargs)
-
- self.partitioner_method = kwargs.get('partitioner_method', Grid.GridPartitioner)
- """The partitioner method to be called when a new model is build"""
- self.partitioner_params = kwargs.get('partitioner_params', {'npart': 10})
- """The partitioner method parameters"""
- self.partitioner = None
- """The most recent trained partitioner"""
-
- self.fts_method = kwargs.get('fts_method', None)
- """The FTS method to be called when a new model is build"""
- self.fts_params = kwargs.get('fts_params', {})
- """The FTS method specific parameters"""
- self.model = None
- """The most recent trained model"""
-
- self.window_length = kwargs.get('window_length',100)
- """The memory window length"""
- self.auto_update = False
- """If true the model is updated at each time and not recreated"""
- self.batch_size = kwargs.get('batch_size', 10)
- """The batch interval between each retraining"""
- self.is_high_order = True
- self.uod_clip = False
- self.max_lag = self.window_length + self.order
- self.is_wrapper = True
-
-[docs] def train(self, data, **kwargs):
- self.partitioner = self.partitioner_method(data=data, **self.partitioner_params)
- self.model = self.fts_method(partitioner=self.partitioner, **self.fts_params)
- if self.model.is_high_order:
- self.model.order = self.model = self.fts_method(partitioner=self.partitioner,
- order=self.order, **self.fts_params)
- self.model.fit(data, **kwargs)
- self.shortname = self.model.shortname
-
-[docs] def forecast(self, data, **kwargs):
- l = len(data)
-
- horizon = self.window_length + self.order
-
- ret = []
-
- for k in np.arange(horizon, l+1):
- _train = data[k - horizon: k - self.order]
- _test = data[k - self.order: k]
-
- if k % self.batch_size == 0 or self.model is None:
- if self.auto_update:
- self.model.train(_train)
- else:
- self.train(_train, **kwargs)
-
- ret.extend(self.model.predict(_test, **kwargs))
-
- return ret
-
- def __str__(self):
- """String representation of the model"""
-
- return str(self.model)
-
- def __len__(self):
- """
- The length (number of rules) of the model
-
- :return: number of rules
- """
- return len(self.model)
-
Source code for pyFTS.models.ismailefendi
-"""
-First Order Improved Weighted Fuzzy Time Series by Efendi, Ismail and Deris (2013)
-
-R. Efendi, Z. Ismail, and M. M. Deris, “Improved weight Fuzzy Time Series as used in the exchange rates forecasting of
-US Dollar to Ringgit Malaysia,” Int. J. Comput. Intell. Appl., vol. 12, no. 1, p. 1350005, 2013.
-"""
-
-import numpy as np
-from pyFTS.common import FuzzySet, FLR, fts, flrg
-
-
-[docs]class ImprovedWeightedFLRG(flrg.FLRG):
- """First Order Improved Weighted Fuzzy Logical Relationship Group"""
- def __init__(self, LHS, **kwargs):
- super(ImprovedWeightedFLRG, self).__init__(1, **kwargs)
- self.LHS = LHS
- self.RHS = {}
- self.rhs_counts = {}
- self.count = 0.0
- self.w = None
-
-[docs] def append_rhs(self, c, **kwargs):
- count = kwargs.get('count', 1.0)
- if c not in self.RHS:
- self.RHS[c] = c
- self.rhs_counts[c] = count
- else:
- self.rhs_counts[c] += count
- self.count += count
-
-[docs] def weights(self):
- if self.w is None:
- self.w = np.array([self.rhs_counts[c] / self.count for c in self.RHS.keys()])
- return self.w
-
- def __str__(self):
- tmp = self.LHS + " -> "
- tmp2 = ""
- for c in sorted(self.RHS.keys()):
- if len(tmp2) > 0:
- tmp2 = tmp2 + ","
- tmp2 = tmp2 + c + "(" + str(round(self.rhs_counts[c] / self.count, 3)) + ")"
- return tmp + tmp2
-
- def __len__(self):
- return len(self.RHS)
-
-
-[docs]class ImprovedWeightedFTS(fts.FTS):
- """First Order Improved Weighted Fuzzy Time Series"""
- def __init__(self, **kwargs):
- super(ImprovedWeightedFTS, self).__init__(order=1, name="IWFTS", **kwargs)
- self.name = "Improved Weighted FTS"
- self.detail = "Ismail & Efendi"
-
-[docs] def generate_flrg(self, flrs):
- for flr in flrs:
- if flr.LHS in self.flrgs:
- self.flrgs[flr.LHS].append_rhs(flr.RHS)
- else:
- self.flrgs[flr.LHS] = ImprovedWeightedFLRG(flr.LHS)
- self.flrgs[flr.LHS].append_rhs(flr.RHS)
-
-[docs] def train(self, ndata, **kwargs):
-
- tmpdata = self.partitioner.fuzzyfy(ndata, method='maximum', mode='sets')
- flrs = FLR.generate_recurrent_flrs(tmpdata)
- self.generate_flrg(flrs)
-
-[docs] def forecast(self, ndata, **kwargs):
-
- explain = kwargs.get('explain', False)
-
- if self.partitioner is not None:
- ordered_sets = self.partitioner.ordered_sets
- else:
- ordered_sets = FuzzySet.set_ordered(self.sets)
-
- ndata = np.array(ndata)
-
- l = len(ndata) if not explain else 1
-
- ret = []
-
- for k in np.arange(0, l):
-
- actual = FuzzySet.get_maximum_membership_fuzzyset(ndata[k], self.sets, ordered_sets)
-
- if explain:
- print("Fuzzyfication:\n\n {} -> {} \n".format(ndata[k], actual.name))
-
- if actual.name not in self.flrgs:
- ret.append(actual.centroid)
-
- if explain:
- print("Rules:\n\n {} -> {} (Naïve)\t Midpoint: {} \n\n".format(actual.name, actual.name,actual.centroid))
-
- else:
- flrg = self.flrgs[actual.name]
- mp = flrg.get_midpoints(self.sets)
-
- final = mp.dot(flrg.weights())
-
- ret.append(final)
-
- if explain:
- print("Rules:\n\n {} \n\n ".format(str(flrg)))
- print("Midpoints: \n\n {}\n\n".format(mp))
-
- print("Deffuzyfied value: {} \n".format(final))
-
- return ret
-
Source code for pyFTS.models.multivariate.FLR
-
-
-[docs]class FLR(object):
- """Multivariate Fuzzy Logical Relationship"""
-
- def __init__(self):
- """
- Creates a Fuzzy Logical Relationship
- :param LHS: Left Hand Side fuzzy set
- :param RHS: Right Hand Side fuzzy set
- """
- self.LHS = {}
- self.RHS = None
-
-
-
-
-
- def __str__(self):
- return "{} -> {}".format([self.LHS[k] for k in self.LHS.keys()], self.RHS)
-
-
-
-
Source code for pyFTS.models.multivariate.cmvfts
-
-import numpy as np
-import pandas as pd
-from pyFTS.common import FuzzySet, FLR, fts, flrg
-from pyFTS.models import hofts
-from pyFTS.models.multivariate import mvfts, grid, common
-from types import LambdaType
-
-
-[docs]class ClusteredMVFTS(mvfts.MVFTS):
- """
- Meta model for high order, clustered multivariate FTS
- """
- def __init__(self, **kwargs):
- super(ClusteredMVFTS, self).__init__(**kwargs)
-
- self.fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS)
- """The FTS method to be called when a new model is build"""
- self.fts_params = kwargs.get('fts_params', {})
- """The FTS method specific parameters"""
- self.model = None
- """The most recent trained model"""
- self.knn = kwargs.get('knn', 2)
-
- self.is_high_order = True
-
- self.is_clustered = True
-
- self.order = kwargs.get("order", 2)
- self.lags = kwargs.get("lags", None)
- self.alpha_cut = kwargs.get('alpha_cut', 0.0)
-
- self.shortname = "ClusteredMVFTS"
- self.name = "Clustered Multivariate FTS"
-
- self.pre_fuzzyfy = kwargs.get('pre_fuzzyfy', True)
-
-[docs] def fuzzyfy(self,data):
- ndata = []
- for index, row in data.iterrows():
- data_point = self.format_data(row)
- ndata.append(self.partitioner.fuzzyfy(data_point, mode='sets'))
-
- return ndata
-
-[docs] def train(self, data, **kwargs):
-
- self.fts_params['order'] = self.order
-
- self.model = self.fts_method(partitioner=self.partitioner, **self.fts_params)
-
- ndata = self.check_data(data)
-
- self.model.train(ndata, fuzzyfied=self.pre_fuzzyfy)
-
- self.partitioner.prune()
-
-[docs] def check_data(self, data):
- if self.pre_fuzzyfy:
- ndata = self.fuzzyfy(data)
- else:
- ndata = [self.format_data(k) for k in data.to_dict('records')]
-
- return ndata
-
-[docs] def forecast(self, ndata, **kwargs):
-
- ndata = self.check_data(ndata)
-
- return self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
-
-[docs] def forecast_multivariate(self, data, **kwargs):
-
- ndata = self.check_data(data)
-
- generators = kwargs.get('generators', {})
-
- already_processed_cols = []
-
- ret = {}
-
- ret[self.target_variable.data_label] = self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
-
- for var in self.explanatory_variables:
- if var.data_label not in already_processed_cols:
- if var.data_label in generators:
- if isinstance(generators[var.data_label], LambdaType):
- fx = generators[var.data_label]
- if len(data[var.data_label].values) > self.order:
- ret[var.data_label] = [fx(k) for k in data[var.data_label].values[self.order:]]
- else:
- ret[var.data_label] = [fx(data[var.data_label].values[-1])]
- elif isinstance(generators[var.data_label], fts.FTS):
- model = generators[var.data_label]
- if not model.is_multivariate:
- ret[var.data_label] = model.forecast(data[var.data_label].values)
- else:
- ret[var.data_label] = model.forecast(data)
- elif self.target_variable.name != var.name:
- self.target_variable = var
- self.partitioner.change_target_variable(var)
- self.model.partitioner = self.partitioner
- self.model.reset_calculated_values()
- ret[var.data_label] = self.model.forecast(ndata, fuzzyfied=self.pre_fuzzyfy, **kwargs)
-
- already_processed_cols.append(var.data_label)
-
- return pd.DataFrame(ret, columns=ret.keys())
-
-[docs] def forecast_ahead_multivariate(self, data, steps, **kwargs):
-
- ndata = self.apply_transformations(data)
-
- ret = ndata.iloc[:self.order]
-
- for k in np.arange(0, steps):
- sample = ret.iloc[k:self.order+k]
- tmp = self.forecast_multivariate(sample, **kwargs)
- ret = ret.append(tmp, ignore_index=True)
-
- return ret
-
- def __str__(self):
- """String representation of the model"""
- return str(self.model)
-
- def __len__(self):
- """
- The length (number of rules) of the model
-
- :return: number of rules
- """
- return len(self.model)
-
-
Source code for pyFTS.models.multivariate.common
-import numpy as np
-import pandas as pd
-from pyFTS.common import FuzzySet, Composite
-
-
-[docs]class MultivariateFuzzySet(Composite.FuzzySet):
- """
- Multivariate Composite Fuzzy Set
- """
- def __init__(self, **kwargs):
- """
- Create an empty composite fuzzy set
- :param name: fuzzy set name
- """
- super(MultivariateFuzzySet, self).__init__("")
- self.sets = {}
- self.target_variable = kwargs.get('target_variable',None)
-
-[docs] def append_set(self, variable, set):
- """
- Appends a new fuzzy set from a new variable
-
- :param variable: an multivariate.variable instance
- :param set: an common.FuzzySet instance
- """
- self.sets[variable] = set
-
- if variable == self.target_variable.name:
- self.centroid = set.centroid
-
- self.name += set.name
-
-[docs] def set_target_variable(self, variable):
- self.target_variable = variable
- self.centroid = self.sets[variable.name].centroid
-
-[docs] def membership(self, x):
- mv = []
- for var in self.sets.keys():
- data = x[var]
- mv.append(self.sets[var].membership(data))
- return np.nanmin(mv)
-
-
-[docs]def fuzzyfy_instance(data_point, var, tuples=True):
- fsets = var.partitioner.fuzzyfy(data_point, mode='sets', method='fuzzy', alpha_cut=var.alpha_cut)
- if tuples:
- return [(var.name, fs) for fs in fsets]
- else:
- return fsets
-
-
-[docs]def fuzzyfy_instance_clustered(data_point, cluster, **kwargs):
- alpha_cut = kwargs.get('alpha_cut', 0.0)
- mode = kwargs.get('mode', 'sets')
- fsets = []
- for fset in cluster.search(data_point, type='name'):
- if cluster.sets[fset].membership(data_point) > alpha_cut:
- if mode == 'sets':
- fsets.append(fset)
- elif mode =='both':
- fsets.append( (fset, cluster.sets[fset].membership(data_point)) )
- return fsets
-
-
-
-
Source code for pyFTS.models.multivariate.flrg
-
-import numpy as np
-from pyFTS.common import flrg as flg
-
-
-[docs]class FLRG(flg.FLRG):
- """
- Multivariate Fuzzy Logical Rule Group
- """
-
- def __init__(self, **kwargs):
- super(FLRG,self).__init__(0,**kwargs)
- self.order = kwargs.get('order', 1)
- self.LHS = kwargs.get('lhs', {})
- self.RHS = set()
-
-[docs] def set_lhs(self, var, fset):
- if self.order == 1:
- self.LHS[var] = fset
- else:
- if var not in self.LHS:
- self.LHS[var] = []
- self.LHS[var].append(fset)
-
-
-
-[docs] def get_membership(self, data, variables):
- mvs = []
- for var in variables:
- s = self.LHS[var.name]
- mvs.append(var.partitioner.sets[s].membership(data[var.name]))
-
- return np.nanmin(mvs)
-
-[docs] def get_lower(self, sets):
- if self.lower is None:
- self.lower = min([sets[rhs].lower for rhs in self.RHS])
-
- return self.lower
-
-[docs] def get_upper(self, sets):
- if self.upper is None:
- self.upper = max([sets[rhs].upper for rhs in self.RHS])
-
- return self.upper
-
- def __str__(self):
- _str = ""
- for k in self.RHS:
- _str += "," if len(_str) > 0 else ""
- _str += k
-
- return self.get_key() + " -> " + _str
-
Source code for pyFTS.models.multivariate.granular
-from pyFTS.models.multivariate import cmvfts, grid
-from pyFTS.models import hofts
-
-
-[docs]class GranularWMVFTS(cmvfts.ClusteredMVFTS):
- """
- Granular multivariate weighted high order FTS
- """
-
- def __init__(self, **kwargs):
- super(GranularWMVFTS, self).__init__(**kwargs)
-
- self.fts_method = hofts.WeightedHighOrderFTS
- self.model = None
- """The most recent trained model"""
- self.knn = kwargs.get('knn', 2)
- self.order = kwargs.get("order", 2)
- self.shortname = "GranularWMVFTS"
- self.name = "Granular Weighted Multivariate FTS"
-
-[docs] def train(self, data, **kwargs):
- self.partitioner = grid.IncrementalGridCluster(
- explanatory_variables=self.explanatory_variables,
- target_variable=self.target_variable,
- neighbors=self.knn)
- super(GranularWMVFTS, self).train(data,**kwargs)
-
-
Source code for pyFTS.models.multivariate.grid
-from pyFTS.models.multivariate import partitioner
-from pyFTS.models.multivariate.common import MultivariateFuzzySet, fuzzyfy_instance_clustered
-from itertools import product
-from scipy.spatial import KDTree
-import numpy as np
-import pandas as pd
-
-
-[docs]class GridCluster(partitioner.MultivariatePartitioner):
- """
- A cartesian product of all fuzzy sets of all variables
- """
-
- def __init__(self, **kwargs):
- super(GridCluster, self).__init__(**kwargs)
- self.name="GridCluster"
- self.build(None)
-
-[docs] def build(self, data):
-
- fsets = [[x for x in k.partitioner.sets.values()]
- for k in self.explanatory_variables]
- c = 0
- for k in product(*fsets):
- mvfset = MultivariateFuzzySet(target_variable=self.target_variable)
- for fset in k:
- mvfset.append_set(fset.variable, fset)
-
- self.sets[mvfset.name] = mvfset
- c += 1
-
- self.build_index()
-
-
-[docs]class IncrementalGridCluster(partitioner.MultivariatePartitioner):
- """
- Create combinations of fuzzy sets of the variables on demand, incrementally increasing the
- multivariate fuzzy set base.
- """
- def __init__(self, **kwargs):
- super(IncrementalGridCluster, self).__init__(**kwargs)
- self.name="IncrementalGridCluster"
- self.build(None)
-
-[docs] def fuzzyfy(self, data, **kwargs):
-
- if isinstance(data, pd.DataFrame):
- ret = []
- for index, inst in data.iterrows():
- mv = self.fuzzyfy(inst, **kwargs)
- ret.append(mv)
- return ret
-
- if self.kdtree is not None:
- fsets = self.search(data, type='name')
- else:
- fsets = self.incremental_search(data, type='name')
-
- mode = kwargs.get('mode', 'sets')
- if mode == 'sets':
- return fsets
- elif mode == 'vector':
- raise NotImplementedError()
- elif mode == 'both':
- ret = []
- for key in fsets:
- mvfset = self.sets[key]
- ret.append((key, mvfset.membership(data)))
- return ret
-
-[docs] def incremental_search(self, data, **kwargs):
- alpha_cut = kwargs.get('alpha_cut', 0.)
- mode = kwargs.get('mode', 'sets')
-
- fsets = {}
- ret = []
- for var in self.explanatory_variables:
- ac = alpha_cut if alpha_cut > 0. else var.alpha_cut
- fsets[var.name] = var.partitioner.fuzzyfy(data[var.name], mode='sets', alpha_cut=ac)
-
- fset = [val for key, val in fsets.items()]
-
- for p in product(*fset):
- key = ''.join(p)
- if key not in self.sets:
- mvfset = MultivariateFuzzySet(target_variable=self.target_variable)
- for ct, fs in enumerate(p):
- mvfset.append_set(self.explanatory_variables[ct].name,
- self.explanatory_variables[ct].partitioner[fs])
- mvfset.name = key
- self.sets[key] = mvfset
- ret.append(key)
-
-
- return ret
-
-
-
-
Source code for pyFTS.models.multivariate.mvfts
-from pyFTS.common import fts, FuzzySet, FLR, Membership
-from pyFTS.partitioners import Grid
-from pyFTS.models.multivariate import FLR as MVFLR, common, flrg as mvflrg
-from itertools import product
-from types import LambdaType
-
-import numpy as np
-import pandas as pd
-
-
-[docs]def product_dict(**kwargs):
- """
- Code by Seth Johnson
- :param kwargs:
- :return:
- """
- keys = kwargs.keys()
- vals = kwargs.values()
- for instance in product(*vals):
- yield dict(zip(keys, instance))
-
-
-[docs]class MVFTS(fts.FTS):
- """
- Multivariate extension of Chen's ConventionalFTS method
- """
- def __init__(self, **kwargs):
- super(MVFTS, self).__init__(**kwargs)
- self.explanatory_variables = kwargs.get('explanatory_variables',[])
- self.target_variable = kwargs.get('target_variable',None)
- self.flrgs = {}
- self.is_multivariate = True
- self.shortname = "MVFTS"
- self.name = "Multivariate FTS"
-
-[docs] def append_variable(self, var):
- """
- Append a new endogenous variable to the model
-
- :param var: variable object
- :return:
- """
- self.explanatory_variables.append(var)
-
-[docs] def format_data(self, data):
- ndata = {}
- for var in self.explanatory_variables:
- ndata[var.name] = var.partitioner.extractor(data[var.data_label])
-
- return ndata
-
-[docs] def apply_transformations(self, data, params=None, updateUoD=False, **kwargs):
- ndata = data.copy(deep=True)
- for var in self.explanatory_variables:
- try:
- values = ndata[var.data_label].values #if isinstance(ndata, pd.DataFrame) else ndata[var.data_label]
- if self.uod_clip and var.partitioner.type == 'common':
- ndata[var.data_label] = np.clip(values,
- var.partitioner.min, var.partitioner.max)
-
- ndata[var.data_label] = var.apply_transformations(values)
- except:
- pass
-
- return ndata
-
-[docs] def generate_lhs_flrs(self, data):
- flrs = []
- lags = {}
- for vc, var in enumerate(self.explanatory_variables):
- data_point = data[var.name]
- lags[var.name] = common.fuzzyfy_instance(data_point, var, tuples=False)
-
- for path in product_dict(**lags):
- flr = MVFLR.FLR()
-
- for var, fset in path.items():
- flr.set_lhs(var, fset)
-
- if len(flr.LHS.keys()) == len(self.explanatory_variables):
- flrs.append(flr)
- else:
- print(flr)
-
- return flrs
-
-[docs] def generate_flrs(self, data):
- flrs = []
- for ct in range(1, len(data.index)):
- ix = data.index[ct-1]
- data_point = self.format_data( data.loc[ix] )
-
- tmp_flrs = self.generate_lhs_flrs(data_point)
-
- target_ix = data.index[ct]
- target_point = data[self.target_variable.data_label][target_ix]
- target = common.fuzzyfy_instance(target_point, self.target_variable)
-
- for flr in tmp_flrs:
- for v, s in target:
- flr.set_rhs(s)
- flrs.append(flr)
-
- return flrs
-
-[docs] def generate_flrg(self, flrs):
- for flr in flrs:
- flrg = mvflrg.FLRG(lhs=flr.LHS)
-
- if flrg.get_key() not in self.flrgs:
- self.flrgs[flrg.get_key()] = flrg
-
- self.flrgs[flrg.get_key()].append_rhs(flr.RHS)
-
-
-[docs] def train(self, data, **kwargs):
-
- ndata = self.apply_transformations(data)
-
- flrs = self.generate_flrs(ndata)
- self.generate_flrg(flrs)
-
-[docs] def forecast(self, data, **kwargs):
- ret = []
- ndata = self.apply_transformations(data)
- c = 0
- for index, row in ndata.iterrows() if isinstance(ndata, pd.DataFrame) else enumerate(ndata):
- data_point = self.format_data(row)
- flrs = self.generate_lhs_flrs(data_point)
- mvs = []
- mps = []
- for flr in flrs:
- flrg = mvflrg.FLRG(lhs=flr.LHS)
- if flrg.get_key() not in self.flrgs:
- #Naïve approach is applied when no rules were found
- if self.target_variable.name in flrg.LHS:
- fs = flrg.LHS[self.target_variable.name]
- fset = self.target_variable.partitioner.sets[fs]
- mp = fset.centroid
- mv = fset.membership(data_point[self.target_variable.name])
- mvs.append(mv)
- mps.append(mp)
- else:
- mvs.append(0.)
- mps.append(0.)
- else:
- _flrg = self.flrgs[flrg.get_key()]
- mvs.append(_flrg.get_membership(data_point, self.explanatory_variables))
- mps.append(_flrg.get_midpoint(self.target_variable.partitioner.sets))
-
- mv = np.array(mvs)
- mp = np.array(mps)
-
- ret.append(np.dot(mv,mp.T)/np.sum(mv))
-
- ret = self.target_variable.apply_inverse_transformations(ret,
- params=data[self.target_variable.data_label].values)
- return ret
-
-[docs] def forecast_ahead(self, data, steps, **kwargs):
- generators = kwargs.get('generators',None)
-
- if generators is None:
- raise Exception('You must provide parameter \'generators\'! generators is a dict where the keys' +
- ' are the dataframe column names (except the target_variable) and the values are ' +
- 'lambda functions that accept one value (the actual value of the variable) '
- ' and return the next value or trained FTS models that accept the actual values and '
- 'forecast new ones.')
-
- ndata = self.apply_transformations(data)
-
- ret = []
- for k in np.arange(0, steps):
- ix = ndata.index[-self.max_lag:]
- sample = ndata.loc[ix]
- tmp = self.forecast(sample, **kwargs)
-
- if isinstance(tmp, (list, np.ndarray)):
- tmp = tmp[-1]
-
- ret.append(tmp)
-
- new_data_point = {}
-
- for data_label in generators.keys():
- if data_label != self.target_variable.data_label:
- if isinstance(generators[data_label], LambdaType):
- last_data_point = ndata.loc[sample.index[-1]]
- new_data_point[data_label] = generators[data_label](last_data_point[data_label])
- elif isinstance(generators[data_label], fts.FTS):
- model = generators[data_label]
- last_data_point = ndata.loc[[sample.index[-model.order]]]
- if not model.is_multivariate:
- last_data_point = last_data_point[data_label].values
-
- new_data_point[data_label] = model.forecast(last_data_point)[0]
-
- new_data_point[self.target_variable.data_label] = tmp
-
- ndata = ndata.append(new_data_point, ignore_index=True)
-
- return ret
-
-[docs] def forecast_interval(self, data, **kwargs):
- ret = []
- ndata = self.apply_transformations(data)
- c = 0
- for index, row in ndata.iterrows() if isinstance(ndata, pd.DataFrame) else enumerate(ndata):
- data_point = self.format_data(row)
- flrs = self.generate_lhs_flrs(data_point)
- mvs = []
- ups = []
- los = []
- for flr in flrs:
- flrg = mvflrg.FLRG(lhs=flr.LHS)
- if flrg.get_key() not in self.flrgs:
- #Naïve approach is applied when no rules were found
- if self.target_variable.name in flrg.LHS:
- fs = flrg.LHS[self.target_variable.name]
- fset = self.target_variable.partitioner.sets[fs]
- up = fset.upper
- lo = fset.lower
- mv = fset.membership(data_point[self.target_variable.name])
- mvs.append(mv)
- ups.append(up)
- los.append(lo)
- else:
- mvs.append(0.)
- ups.append(0.)
- los.append(0.)
- else:
- _flrg = self.flrgs[flrg.get_key()]
- mvs.append(_flrg.get_membership(data_point, self.explanatory_variables))
- ups.append(_flrg.get_upper(self.target_variable.partitioner.sets))
- los.append(_flrg.get_lower(self.target_variable.partitioner.sets))
-
- mv = np.array(mvs)
- up = np.dot(mv, np.array(ups).T) / np.nansum(mv)
- lo = np.dot(mv, np.array(los).T) / np.nansum(mv)
-
- ret.append([lo, up])
-
- ret = self.target_variable.apply_inverse_transformations(ret,
- params=data[self.target_variable.data_label].values)
- return ret
-
-[docs] def clone_parameters(self, model):
- super(MVFTS, self).clone_parameters(model)
-
- self.explanatory_variables = model.explanatory_variables
- self.target_variable = model.target_variable
-
- def __str__(self):
- _str = self.name + ":\n"
- for k in self.flrgs.keys():
- _str += str(self.flrgs[k]) + "\n"
-
- return _str
-
-
-
Source code for pyFTS.models.multivariate.variable
-import pandas as pd
-from pyFTS.common import fts, FuzzySet, FLR, Membership, tree
-from pyFTS.partitioners import Grid
-from pyFTS.models.multivariate import FLR as MVFLR
-
-
-[docs]class Variable:
- """
- A variable of a fuzzy time series multivariate model. Each variable contains its own
- transformations and partitioners.
- """
- def __init__(self, name, **kwargs):
- """
-
- :param name:
- :param \**kwargs: See below
-
- :Keyword Arguments:
- * *alias* -- Alternative name for the variable
- """
- self.name = name
- """A string with the name of the variable"""
- self.alias = kwargs.get('alias', self.name)
- """A string with the alias of the variable"""
- self.data_label = kwargs.get('data_label', self.name)
- """A string with the column name on DataFrame"""
- self.type = kwargs.get('type', 'common')
- self.data_type = kwargs.get('data_type', None)
- """The type of the data column on Pandas Dataframe"""
- self.mask = kwargs.get('mask', None)
- """The mask for format the data column on Pandas Dataframe"""
- self.transformation = kwargs.get('transformation', None)
- """Pre processing transformation for the variable"""
- self.transformation_params = kwargs.get('transformation_params', None)
- self.partitioner = None
- """UoD partitioner for the variable data"""
- self.alpha_cut = kwargs.get('alpha_cut', 0.0)
- """Minimal membership value to be considered on fuzzyfication process"""
-
-
- if kwargs.get('data', None) is not None:
- self.build(**kwargs)
-
-[docs] def build(self, **kwargs):
- """
-
- :param kwargs:
- :return:
- """
- fs = kwargs.get('partitioner', Grid.GridPartitioner)
- mf = kwargs.get('func', Membership.trimf)
- np = kwargs.get('npart', 10)
- data = kwargs.get('data', None)
- kw = kwargs.get('partitioner_specific', {})
- self.partitioner = fs(data=data[self.data_label].values, npart=np, func=mf,
- transformation=self.transformation, prefix=self.alias,
- variable=self.name, **kw)
-
- self.partitioner.name = self.name + " " + self.partitioner.name
-
-[docs] def apply_transformations(self, data, **kwargs):
-
- if kwargs.get('params', None) is not None:
- self.transformation_params = kwargs.get('params', None)
-
- if self.transformation is not None:
- return self.transformation.apply(data, self.transformation_params)
-
- return data
-
-[docs] def apply_inverse_transformations(self, data, **kwargs):
-
- if kwargs.get('params', None) is not None:
- self.transformation_params = kwargs.get('params', None)
-
- if self.transformation is not None:
- return self.transformation.inverse(data, self.transformation_params)
-
- return data
-
- def __str__(self):
- return self.name
-
Source code for pyFTS.models.multivariate.wmvfts
-from pyFTS.common import fts, FuzzySet, FLR, Membership, tree
-from pyFTS.partitioners import Grid
-from pyFTS.models.multivariate import mvfts, FLR as MVFLR, common, flrg as mvflrg
-
-import numpy as np
-import pandas as pd
-
-
-[docs]class WeightedFLRG(mvflrg.FLRG):
- """
- Weighted Multivariate Fuzzy Logical Rule Group
- """
-
- def __init__(self, **kwargs):
- super(WeightedFLRG, self).__init__(**kwargs)
- self.order = kwargs.get('order', 1)
- self.LHS = kwargs.get('lhs', {})
- self.RHS = {}
- self.count = 0.0
- self.w = None
-
-[docs] def append_rhs(self, fset, **kwargs):
- count = kwargs.get('count', 1.0)
- if fset not in self.RHS:
- self.RHS[fset] = count
- else:
- self.RHS[fset] += count
- self.count += count
-
-[docs] def weights(self):
- if self.w is None:
- self.w = np.array([self.RHS[c] / self.count for c in self.RHS.keys()])
- return self.w
-
-[docs] def get_midpoint(self, sets):
- if self.midpoint is None:
- mp = np.array([sets[c].centroid for c in self.RHS.keys()])
- self.midpoint = mp.dot(self.weights())
-
- return self.midpoint
-
-[docs] def get_lower(self, sets):
- if self.lower is None:
- lw = np.array([sets[s].lower for s in self.RHS.keys()])
- self.lower = lw.dot(self.weights())
- return self.lower
-
-[docs] def get_upper(self, sets):
- if self.upper is None:
- up = np.array([sets[s].upper for s in self.RHS.keys()])
- self.upper = up.dot(self.weights())
- return self.upper
-
-
- def __str__(self):
- _str = ""
- for k in self.RHS.keys():
- _str += ", " if len(_str) > 0 else ""
- _str += k + " (" + str(round( self.RHS[k] / self.count, 3)) + ")"
-
- return self.get_key() + " -> " + _str
-
-
-[docs]class WeightedMVFTS(mvfts.MVFTS):
- """
- Weighted Multivariate FTS
- """
- def __init__(self, **kwargs):
- super(WeightedMVFTS, self).__init__(order=1, **kwargs)
- self.shortname = "WeightedMVFTS"
- self.name = "Weighted Multivariate FTS"
-
-[docs] def generate_flrg(self, flrs):
- for flr in flrs:
- flrg = WeightedFLRG(lhs=flr.LHS)
-
- if flrg.get_key() not in self.flrgs:
- self.flrgs[flrg.get_key()] = flrg
-
- self.flrgs[flrg.get_key()].append_rhs(flr.RHS)
-
Source code for pyFTS.models.nonstationary.common
-"""
-Non Stationary Fuzzy Sets
-
-GARIBALDI, Jonathan M.; JAROSZEWSKI, Marcin; MUSIKASUWAN, Salang. Nonstationary fuzzy sets.
-IEEE Transactions on Fuzzy Systems, v. 16, n. 4, p. 1072-1086, 2008.
-"""
-
-import numpy as np
-from pyFTS import *
-from pyFTS.common import FuzzySet as FS, Membership, FLR
-from pyFTS.partitioners import partitioner
-from pyFTS.models.nonstationary import perturbation
-
-
-[docs]class FuzzySet(FS.FuzzySet):
- """
- Non Stationary Fuzzy Sets
- """
-
- def __init__(self, name, mf, parameters, **kwargs):
- """
- Constructor
- """
- super(FuzzySet, self).__init__(name=name, mf=mf, parameters=parameters, centroid=None, alpha=1.0, **kwargs)
-
- self.location = kwargs.get("location", None)
- """Pertubation function that affects the location of the membership function"""
- self.location_params = kwargs.get("location_params", None)
- """Parameters for location pertubation function"""
- self.location_roots = kwargs.get("location_roots", 0)
- self.width = kwargs.get("width", None)
- """Pertubation function that affects the width of the membership function"""
- self.width_params = kwargs.get("width_params", None)
- """Parameters for width pertubation function"""
- self.width_roots = kwargs.get("width_roots", 0)
- self.noise = kwargs.get("noise", None)
- """Pertubation function that adds noise on the membership function"""
- self.noise_params = kwargs.get("noise_params", None)
- """Parameters for noise pertubation function"""
- self.perturbated_parameters = {}
- self.type = 'nonstationary'
-
- if self.location is not None and not isinstance(self.location, (list, set)):
- self.location = [self.location]
- self.location_params = [self.location_params]
- self.location_roots = [self.location_roots]
-
- if self.width is not None and not isinstance(self.width, (list, set)):
- self.width = [self.width]
- self.width_params = [self.width_params]
- self.width_roots = [self.width_roots]
-
-[docs] def perform_location(self, t, param):
- if self.location is None:
- inc = t
- else:
- l = len(self.location)
-
- inc = sum([self.location[k](t + self.location_roots[k], self.location_params[k]) for k in np.arange(0, l)])
-
- if self.mf == Membership.gaussmf:
- # changes only the mean parameter
- return [param[0] + inc, param[1]]
- elif self.mf == Membership.sigmf:
- # changes only the midpoint parameter
- return [param[0], param[1] + inc]
- elif self.mf == Membership.bellmf:
- return [param[0], param[1], param[2] + inc]
- else:
- # translate all parameters
- return [k + inc for k in param]
-
-[docs] def perform_width(self, t, param):
- if self.width is None:
- inc = t
- else:
-
- l = len(self.width)
-
- inc = sum([self.width[k](t + self.width_roots[k], self.width_params[k]) for k in np.arange(0, l)])
-
- if self.mf == Membership.gaussmf:
- # changes only the variance parameter
- return [param[0], param[1] + inc]
- elif self.mf == Membership.sigmf:
- # changes only the smooth parameter
- return [param[0] + inc, param[1]]
- elif self.mf == Membership.trimf:
- tmp = inc / 2
- return [param[0] - tmp, param[1], param[2] + tmp]
- elif self.mf == Membership.trapmf:
- l = (param[3] - param[0])
- rab = (param[1] - param[0]) / l
- rcd = (param[3] - param[2]) / l
- return [param[0] - inc, param[1] - inc * rab, param[2] + inc * rcd, param[3] + inc]
- else:
- return param
-
-[docs] def membership(self, x, t):
- """
- Calculate the membership value of a given input
-
- :param x: input value
- :param t: time displacement or perturbation parameters
- :return: membership value of x at this fuzzy set
- """
-
- self.perturbate_parameters(t)
-
- tmp = self.mf(x, self.perturbated_parameters[str(t)])
-
- if self.noise is not None:
- tmp += self.noise(t, self.noise_params)
-
- return tmp
-
-[docs] def perturbate_parameters(self, t):
- if str(t) not in self.perturbated_parameters:
- param = self.parameters
- if isinstance(t, (list, set)):
- param = self.perform_location(t[0], param)
- param = self.perform_width(t[1], param)
- else:
- param = self.perform_location(t, param)
- param = self.perform_width(t, param)
- self.perturbated_parameters[str(t)] = param
-
-[docs] def get_midpoint(self, t):
-
- self.perturbate_parameters(t)
- param = self.perturbated_parameters[str(t)]
-
- if self.mf == Membership.gaussmf:
- return param[0]
- elif self.mf == Membership.sigmf:
- return param[1]
- elif self.mf == Membership.trimf:
- return param[1]
- elif self.mf == Membership.trapmf:
- return (param[2] - param[1]) / 2
- else:
- return param
-
-[docs] def get_lower(self, t):
-
- self.perturbate_parameters(t)
- param = self.perturbated_parameters[str(t)]
-
- if self.mf == Membership.gaussmf:
- return param[0] - 3*param[1]
- elif self.mf == Membership.sigmf:
- return param[0] - param[1]
- elif self.mf == Membership.trimf:
- return param[0]
- elif self.mf == Membership.trapmf:
- return param[0]
- else:
- return param
-
-[docs] def get_upper(self, t):
-
- self.perturbate_parameters(t)
- param = self.perturbated_parameters[str(t)]
-
- if self.mf == Membership.gaussmf:
- return param[0] + 3*param[1]
- elif self.mf == Membership.sigmf:
- return param[0] + param[1]
- elif self.mf == Membership.trimf:
- return param[2]
- elif self.mf == Membership.trapmf:
- return param[3]
- else:
- return param
-
- def __str__(self):
- tmp = ""
- if self.location is not None:
- tmp += "Location: "
- for ct, f in enumerate(self.location):
- tmp += str(f.__name__) + "(" + str(["{0:.2f}".format(p) for p in self.location_params[ct]]) + ") "
- if self.width is not None:
- tmp += "Width: "
- for ct, f in enumerate(self.width):
- tmp += str(f.__name__) + "(" + str(["{0:.2f}".format(p) for p in self.width_params[ct]]) + ") "
- tmp = "(" + str(["{0:.2f}".format(p) for p in self.parameters]) + ") " + tmp
- return self.name + ": " + str(self.mf.__name__) + tmp
-
-
-[docs]def fuzzify(inst, t, fuzzySets):
- """
- Calculate the membership values for a data point given nonstationary fuzzy sets
-
- :param inst: data points
- :param t: time displacement of the instance
- :param fuzzySets: list of fuzzy sets
- :return: array of membership values
- """
- ret = []
- if not isinstance(inst, list):
- inst = [inst]
- for t, i in enumerate(inst):
- mv = np.array([fs.membership(i, t) for fs in fuzzySets])
- ret.append(mv)
- return ret
-
-
-[docs]def fuzzySeries(data, fuzzySets, ordered_sets, window_size=1, method='fuzzy', const_t= None):
- fts = []
- for t, i in enumerate(data):
- tdisp = window_index(t, window_size) if const_t is None else const_t
- mv = np.array([fuzzySets[fs].membership(i, tdisp) for fs in ordered_sets])
- if len(mv) == 0:
- sets = [check_bounds(i, fuzzySets, tdisp)]
- else:
- if method == 'fuzzy':
- ix = np.ravel(np.argwhere(mv > 0.0))
- elif method == 'maximum':
- mx = max(mv)
- ix = np.ravel(np.argwhere(mv == mx))
- sets = [fuzzySets[ordered_sets[i]] for i in ix]
- fts.append(sets)
- return fts
-
-
-[docs]def window_index(t, window_size):
- if isinstance(t, (list, set)):
- return t
- return t - (t % window_size)
-
-
-[docs]def check_bounds(data, partitioner, t):
- if data < partitioner.lower_set().get_lower(t):
- return partitioner.lower_set()
- elif data > partitioner.upper_set().get_upper(t):
- return partitioner.upper_set()
-
-
-[docs]def check_bounds_index(data, partitioner, t):
- if data < partitioner.lower_set().get_lower(t):
- return 0
- elif data > partitioner.upper_set().get_upper(t):
- return len(partitioner.sets) -1
-
Source code for pyFTS.models.nonstationary.cvfts
-import numpy as np
-from pyFTS.models import hofts
-from pyFTS.models.nonstationary import common,nsfts
-from pyFTS.common import FLR, flrg, tree
-
-
-[docs]class HighOrderNonstationaryFLRG(hofts.HighOrderFTS):
- """Conventional High Order Fuzzy Logical Relationship Group"""
- def __init__(self, order, **kwargs):
- super(HighOrderNonstationaryFLRG, self).__init__(order, **kwargs)
- self.LHS = []
- self.RHS = {}
- self.strlhs = ""
-
-
-
-
-
- def __str__(self):
- tmp = ""
- for c in sorted(self.RHS):
- if len(tmp) > 0:
- tmp = tmp + ","
- tmp = tmp + c
- return self.get_key() + " -> " + tmp
-
-
- def __len__(self):
- return len(self.RHS)
-
-
-[docs]class ConditionalVarianceFTS(hofts.HighOrderFTS):
- def __init__(self, **kwargs):
- super(ConditionalVarianceFTS, self).__init__(**kwargs)
- self.name = "Conditional Variance FTS"
- self.shortname = "CVFTS "
- self.detail = ""
- self.flrgs = {}
- self.is_high_order = False
- if self.partitioner is not None:
- self.append_transformation(self.partitioner.transformation)
-
- self.min_stack = [0,0,0]
- self.max_stack = [0,0,0]
- self.uod_clip = False
- self.order = 1
- self.min_order = 1
- self.max_lag = 1
- self.inputs = []
- self.forecasts = []
- self.residuals = []
- self.variance_residual = 0.
- self.mean_residual = 0.
- self.memory_window = kwargs.get("memory_window",5)
-
-[docs] def train(self, ndata, **kwargs):
-
- tmpdata = common.fuzzySeries(ndata, self.sets,
- self.partitioner.ordered_sets,
- method='fuzzy', const_t=0)
- flrs = FLR.generate_non_recurrent_flrs(tmpdata)
- self.generate_flrg(flrs)
-
- self.forecasts = self.forecast(ndata, no_update=True)
- self.residuals = np.array(ndata[1:]) - np.array(self.forecasts[:-1])
-
- self.variance_residual = np.var(self.residuals) # np.max(self.residuals
- self.mean_residual = np.mean(self.residuals)
-
- self.residuals = self.residuals[-self.memory_window:].tolist()
- self.forecasts = self.forecasts[-self.memory_window:]
- self.inputs = np.array(ndata[-self.memory_window:]).tolist()
-
-
-[docs] def generate_flrg(self, flrs, **kwargs):
- for flr in flrs:
- if flr.LHS.name in self.flrgs:
- self.flrgs[flr.LHS.name].append_rhs(flr.RHS.name)
- else:
- self.flrgs[flr.LHS.name] = nsfts.ConventionalNonStationaryFLRG(flr.LHS.name)
- self.flrgs[flr.LHS.name].append_rhs(flr.RHS.name)
-
-
- def _smooth(self, a):
- return .1 * a[0] + .3 * a[1] + .6 * a[2]
-
-[docs] def perturbation_factors(self, data, **kwargs):
- npart = len(self.partitioner.sets)
- _max = 0
- _min = 0
- if data < self.original_min:
- _min = data - self.original_min if data < 0 else self.original_min - data
- elif data > self.original_max:
- _max = data - self.original_max if data > 0 else self.original_max - data
- self.min_stack.pop(2)
- self.min_stack.insert(0, _min)
- _min = min(self.min_stack)
- self.max_stack.pop(2)
- self.max_stack.insert(0, _max)
- _max = max(self.max_stack)
-
- _range = (_max - _min)/2
-
- translate = np.linspace(_min, _max, npart)
-
- var = np.std(self.residuals)
-
- var = 0 if var < 1 else var
-
- loc = (self.mean_residual + np.mean(self.residuals))
-
- location = [_range + w + loc + k for k in np.linspace(-var,var, npart) for w in translate]
-
- scale = [abs(location[0] - location[2])]
- scale.extend([abs(location[k - 1] - location[k + 1]) for k in np.arange(1, npart)])
- scale.append(abs(location[-1] - location[-3]))
-
- perturb = [[location[k], scale[k]] for k in np.arange(npart)]
-
- return perturb
-
-[docs] def perturbation_factors__old(self, data):
- npart = len(self.partitioner.sets)
- _max = 0
- _min = 0
- if data < self.original_min:
- _min = data - self.original_min if data < 0 else self.original_min - data
- elif data > self.original_max:
- _max = data - self.original_max if data > 0 else self.original_max - data
- self.min_stack.pop(2)
- self.min_stack.insert(0,_min)
- _min = min(self.min_stack)
- self.max_stack.pop(2)
- self.max_stack.insert(0, _max)
- _max = max(self.max_stack)
-
- location = np.linspace(_min, _max, npart)
- scale = [abs(location[0] - location[2])]
- scale.extend([abs(location[k-1] - location[k+1]) for k in np.arange(1, npart)])
- scale.append(abs(location[-1] - location[-3]))
-
- perturb = [[location[k], scale[k]] for k in np.arange(0, npart)]
-
- return perturb
-
- def _fsset_key(self, ix):
- return self.partitioner.ordered_sets[ix]
-
- def _affected_sets(self, sample, perturb):
-
- affected_sets = [[ct, self.sets[self._fsset_key(ct)].membership(sample, perturb[ct])]
- for ct in np.arange(len(self.partitioner.sets))
- if self.sets[self._fsset_key(ct)].membership(sample, perturb[ct]) > 0.0]
-
- if len(affected_sets) == 0:
-
- if sample < self.partitioner.lower_set().get_lower(perturb[0]):
- affected_sets.append([0, 1])
- elif sample > self.partitioner.upper_set().get_upper(perturb[-1]):
- affected_sets.append([len(self.sets) - 1, 1])
-
- return affected_sets
-
-[docs] def forecast(self, ndata, **kwargs):
- l = len(ndata)
-
- ret = []
-
- no_update = kwargs.get("no_update",False)
-
- for k in np.arange(0, l):
-
- sample = ndata[k]
-
- if not no_update:
- perturb = self.perturbation_factors(sample)
- else:
- perturb = [[0, 1] for k in np.arange(len(self.partitioner.sets))]
-
- affected_sets = self._affected_sets(sample, perturb)
-
- numerator = []
- denominator = []
-
- if len(affected_sets) == 1:
- ix = affected_sets[0][0]
- aset = self.partitioner.ordered_sets[ix]
- if aset in self.flrgs:
- numerator.append(self.flrgs[aset].get_midpoint(self.sets, perturb[ix]))
- else:
- fuzzy_set = self.sets[aset]
- numerator.append(fuzzy_set.get_midpoint(perturb[ix]))
- denominator.append(1)
- else:
- for aset in affected_sets:
- ix = aset[0]
- fs = self.partitioner.ordered_sets[ix]
- tdisp = perturb[ix]
- if fs in self.flrgs:
- numerator.append(self.flrgs[fs].get_midpoint(self.sets, tdisp) * aset[1])
- else:
- fuzzy_set = self.sets[fs]
- numerator.append(fuzzy_set.get_midpoint(tdisp) * aset[1])
- denominator.append(aset[1])
-
- if sum(denominator) > 0:
- pto = sum(numerator) /sum(denominator)
- else:
- pto = sum(numerator)
-
- ret.append(pto)
-
- if not no_update:
- self.forecasts.append(pto)
- self.residuals.append(self.inputs[-1] - self.forecasts[-1])
- self.inputs.append(sample)
-
- self.inputs.pop(0)
- self.forecasts.pop(0)
- self.residuals.pop(0)
-
- return ret
-
-
-[docs] def forecast_interval(self, ndata, **kwargs):
- l = len(ndata)
-
- ret = []
-
- for k in np.arange(0, l):
-
- sample = ndata[k]
-
- perturb = self.perturbation_factors(sample)
-
- affected_sets = self._affected_sets(sample, perturb)
-
- upper = []
- lower = []
-
- if len(affected_sets) == 1:
- ix = affected_sets[0][0]
- aset = self.partitioner.ordered_sets[ix]
- if aset in self.flrgs:
- lower.append(self.flrgs[aset].get_lower(perturb[ix]))
- upper.append(self.flrgs[aset].get_upper(perturb[ix]))
- else:
- fuzzy_set = self.sets[aset]
- lower.append(fuzzy_set.get_lower(perturb[ix]))
- upper.append(fuzzy_set.get_upper(perturb[ix]))
- else:
- for aset in affected_sets:
- ix = aset[0]
- fs = self.partitioner.ordered_sets[ix]
- tdisp = perturb[ix]
- if fs in self.flrgs:
- lower.append(self.flrgs[fs].get_lower(tdisp) * aset[1])
- upper.append(self.flrgs[fs].get_upper(tdisp) * aset[1])
- else:
- fuzzy_set = self.sets[fs]
- lower.append(fuzzy_set.get_lower(tdisp) * aset[1])
- upper.append(fuzzy_set.get_upper(tdisp) * aset[1])
-
- itvl = [sum(lower), sum(upper)]
-
- ret.append(itvl)
-
- return ret
-
Source code for pyFTS.models.nonstationary.flrg
-
-from pyFTS.common import flrg
-from pyFTS.models.nonstationary import common
-import numpy as np
-
-
-[docs]class NonStationaryFLRG(flrg.FLRG):
-[docs] def unpack_args(self, *args):
- l = len(args)
- tmp = args
- sets, t, w = None, 0, 1
- if l > 0 and isinstance(tmp[0], dict):
- sets = tmp[0]
- if l > 1 and isinstance(tmp[1], (int, list, tuple)):
- t = tmp[1]
- if l > 2 and isinstance(tmp[2], int):
- w = tmp[2]
-
- return (sets, t, w)
-
-
- def __init__(self, LHS, **kwargs):
- super(NonStationaryFLRG, self).__init__(1, **kwargs)
- self.LHS = LHS
- self.RHS = set()
-
-[docs] def get_key(self):
- if isinstance(self.LHS, list):
- return str([k for k in self.LHS])
- elif isinstance(self.LHS, dict):
- return str(self.LHS.keys())
- else:
- return self.LHS
-
-[docs] def get_membership(self, data, *args):
- sets, t, window_size = self.unpack_args(*args)
- ret = 0.0
- if isinstance(self.LHS, (list, set)):
- ret = min([sets[self.LHS[ct]].membership(dat, common.window_index(t - (self.order - ct), window_size))
- for ct, dat in enumerate(data)])
- else:
- ret = self.LHS.membership(data, common.window_index(t, window_size))
- return ret
-
-[docs] def get_midpoint(self, *args):
- sets, t, window_size = self.unpack_args(*args)
- if len(self.RHS) > 0:
- if isinstance(self.RHS, (list, set)):
- tmp = [sets[r].get_midpoint(common.window_index(t, window_size)) for r in self.RHS]
- elif isinstance(self.RHS, dict):
- tmp = [sets[r].get_midpoint(common.window_index(t, window_size)) for r in self.RHS.keys()]
- return sum(tmp) / len(tmp)
- else:
- return sets[self.LHS[-1]].get_midpoint(common.window_index(t, window_size))
-
-[docs] def get_lower(self, *args):
- sets, t, window_size = self.unpack_args(*args)
- if len(self.RHS) > 0:
- if isinstance(self.RHS, (list, set)):
- return min([sets[r].get_lower(common.window_index(t, window_size)) for r in self.RHS])
- elif isinstance(self.RHS, dict):
- return min([sets[r].get_lower(common.window_index(t, window_size)) for r in self.RHS.keys()])
- else:
- return sets[self.LHS[-1]].get_lower(common.window_index(t, window_size))
-
-[docs] def get_upper(self, *args):
- sets, t, window_size = self.unpack_args(*args)
- if len(self.RHS) > 0:
- if isinstance(self.RHS, (list, set)):
- return max([sets[r].get_upper(common.window_index(t, window_size)) for r in self.RHS])
- elif isinstance(self.RHS, dict):
- return max([sets[r].get_upper(common.window_index(t, window_size)) for r in self.RHS.keys()])
- else:
- return sets[self.LHS[-1]].get_upper(common.window_index(t, window_size))
-
Source code for pyFTS.models.nonstationary.honsfts
-import numpy as np
-from pyFTS.common import FuzzySet, FLR, fts, tree
-from pyFTS.models import hofts
-from pyFTS.models.nonstationary import common, flrg
-
-
-[docs]class HighOrderNonStationaryFLRG(flrg.NonStationaryFLRG):
- """First Order NonStationary Fuzzy Logical Relationship Group"""
- def __init__(self, order, **kwargs):
- super(HighOrderNonStationaryFLRG, self).__init__(order, **kwargs)
-
- self.LHS = []
- self.RHS = {}
-
-
-
-
-
- def __str__(self):
- tmp = ""
- for c in sorted(self.RHS):
- if len(tmp) > 0:
- tmp = tmp + ","
- tmp = tmp + c
- return self.get_key() + " -> " + tmp
-
-
-[docs]class HighOrderNonStationaryFTS(hofts.HighOrderFTS):
- """NonStationaryFTS Fuzzy Time Series"""
- def __init__(self, name, **kwargs):
- super(HighOrderNonStationaryFTS, self).__init__("HONSFTS " + name, **kwargs)
- self.name = "High Order Non Stationary FTS"
- self.detail = ""
- self.flrgs = {}
-
-[docs] def generate_flrg(self, data, **kwargs):
- l = len(data)
- window_size = kwargs.get("window_size", 1)
- for k in np.arange(self.order, l):
- if self.dump: print("FLR: " + str(k))
-
- sample = data[k - self.order: k]
-
- disp = common.window_index(k, window_size)
-
- rhs = [self.sets[key] for key in self.partitioner.ordered_sets
- if self.sets[key].membership(data[k], disp) > 0.0]
-
- if len(rhs) == 0:
- rhs = [common.check_bounds(data[k], self.partitioner, disp)]
-
- lags = {}
-
- for o in np.arange(0, self.order):
- tdisp = common.window_index(k - (self.order - o), window_size)
- lhs = [self.sets[key] for key in self.partitioner.ordered_sets
- if self.sets[key].membership(sample[o], tdisp) > 0.0]
-
- if len(lhs) == 0:
- lhs = [common.check_bounds(sample[o], self.partitioner, tdisp)]
-
- lags[o] = lhs
-
- root = tree.FLRGTreeNode(None)
-
- tree.build_tree_without_order(root, lags, 0)
-
- # Trace the possible paths
- for p in root.paths():
- flrg = HighOrderNonStationaryFLRG(self.order)
- path = list(reversed(list(filter(None.__ne__, p))))
-
- for c, e in enumerate(path, start=0):
- flrg.append_lhs(e)
-
- if flrg.get_key() not in self.flrgs:
- self.flrgs[flrg.get_key()] = flrg;
-
- for st in rhs:
- self.flrgs[flrg.get_key()].append_rhs(st)
-
- # flrgs = sorted(flrgs, key=lambda flrg: flrg.get_midpoint(0, window_size=1))
-
-[docs] def train(self, data, **kwargs):
-
- if kwargs.get('order', None) is not None:
- self.order = kwargs.get('order', 1)
-
- if kwargs.get('sets', None) is not None:
- self.sets = kwargs.get('sets', None)
-
- window_size = kwargs.get('parameters', 1)
- self.generate_flrg(data, window_size=window_size)
-
- def _affected_flrgs(self, sample, k, time_displacement, window_size):
- # print("input: " + str(ndata[k]))
-
- affected_flrgs = []
- affected_flrgs_memberships = []
-
- lags = {}
-
- for ct, dat in enumerate(sample):
- tdisp = common.window_index((k + time_displacement) - (self.order - ct), window_size)
-
- sel = [ct for ct, key in enumerate(self.partitioner.ordered_sets)
- if self.sets[key].membership(dat, tdisp) > 0.0]
-
- if len(sel) == 0:
- sel.append(common.check_bounds_index(dat, self.partitioner, tdisp))
-
- lags[ct] = sel
-
- # Build the tree with all possible paths
-
- root = tree.FLRGTreeNode(None)
-
- tree.build_tree_without_order(root, lags, 0)
-
- # Trace the possible paths and build the PFLRG's
-
- for p in root.paths():
- path = list(reversed(list(filter(None.__ne__, p))))
- flrg = HighOrderNonStationaryFLRG(self.order)
-
- for kk in path:
- flrg.append_lhs(self.sets[self.partitioner.ordered_sets[kk]])
-
- affected_flrgs.append(flrg)
- # affected_flrgs_memberships.append_rhs(flrg.get_membership(sample, disp))
-
- # print(flrg.get_key())
-
- # the FLRG is here because of the bounds verification
- mv = []
- for ct, dat in enumerate(sample):
- td = common.window_index((k + time_displacement) - (self.order - ct), window_size)
- tmp = flrg.LHS[ct].membership(dat, td)
-
-
- mv.append(tmp)
- # print(mv)
-
- affected_flrgs_memberships.append(np.prod(mv))
-
- return [affected_flrgs, affected_flrgs_memberships]
-
-[docs] def forecast(self, ndata, **kwargs):
-
- time_displacement = kwargs.get("time_displacement",0)
-
- window_size = kwargs.get("window_size", 1)
-
- l = len(ndata)
-
- ret = []
-
- for k in np.arange(self.order, l+1):
-
- sample = ndata[k - self.order: k]
-
- affected_flrgs, affected_flrgs_memberships = self._affected_flrgs(sample, k,
- time_displacement, window_size)
-
- #print([str(k) for k in affected_flrgs])
- #print(affected_flrgs_memberships)
-
- tmp = []
- tdisp = common.window_index(k + time_displacement, window_size)
- if len(affected_flrgs) == 0:
- tmp.append(common.check_bounds(sample[-1], self.sets, tdisp))
- elif len(affected_flrgs) == 1:
- flrg = affected_flrgs[0]
- if flrg.get_key() in self.flrgs:
- tmp.append(self.flrgs[flrg.get_key()].get_midpoint(tdisp))
- else:
- tmp.append(flrg.LHS[-1].get_midpoint(tdisp))
- else:
- for ct, aset in enumerate(affected_flrgs):
- if aset.get_key() in self.flrgs:
- tmp.append(self.flrgs[aset.get_key()].get_midpoint(tdisp) *
- affected_flrgs_memberships[ct])
- else:
- tmp.append(aset.LHS[-1].get_midpoint(tdisp)*
- affected_flrgs_memberships[ct])
- pto = sum(tmp)
-
- #print(pto)
-
- ret.append(pto)
-
- return ret
-
-[docs] def forecast_interval(self, ndata, **kwargs):
-
- time_displacement = kwargs.get("time_displacement", 0)
-
- window_size = kwargs.get("window_size", 1)
-
- l = len(ndata)
-
- ret = []
-
- for k in np.arange(self.order, l + 1):
-
- sample = ndata[k - self.order: k]
-
- affected_flrgs, affected_flrgs_memberships = self._affected_flrgs(sample, k,
- time_displacement, window_size)
-
- # print([str(k) for k in affected_flrgs])
- # print(affected_flrgs_memberships)
-
- upper = []
- lower = []
-
- tdisp = common.window_index(k + time_displacement, window_size)
- if len(affected_flrgs) == 0:
- aset = common.check_bounds(sample[-1], self.sets, tdisp)
- lower.append(aset.get_lower(tdisp))
- upper.append(aset.get_upper(tdisp))
- elif len(affected_flrgs) == 1:
- _flrg = affected_flrgs[0]
- if _flrg.get_key() in self.flrgs:
- lower.append(self.flrgs[_flrg.get_key()].get_lower(tdisp))
- upper.append(self.flrgs[_flrg.get_key()].get_upper(tdisp))
- else:
- lower.append(_flrg.LHS[-1].get_lower(tdisp))
- upper.append(_flrg.LHS[-1].get_upper(tdisp))
- else:
- for ct, aset in enumerate(affected_flrgs):
- if aset.get_key() in self.flrgs:
- lower.append(self.flrgs[aset.get_key()].get_lower(tdisp) *
- affected_flrgs_memberships[ct])
- upper.append(self.flrgs[aset.get_key()].get_upper(tdisp) *
- affected_flrgs_memberships[ct])
- else:
- lower.append(aset.LHS[-1].get_lower(tdisp) *
- affected_flrgs_memberships[ct])
- upper.append(aset.LHS[-1].get_upper(tdisp) *
- affected_flrgs_memberships[ct])
-
- ret.append([sum(lower), sum(upper)])
-
-
- return ret
-
Source code for pyFTS.models.nonstationary.nsfts
-import numpy as np
-from pyFTS.common import FLR, fts
-from pyFTS.models.nonstationary import common, flrg
-
-
-[docs]class ConventionalNonStationaryFLRG(flrg.NonStationaryFLRG):
- """First Order NonStationary Fuzzy Logical Relationship Group"""
-
- def __init__(self, LHS, **kwargs):
- super(ConventionalNonStationaryFLRG, self).__init__(1, **kwargs)
- self.LHS = LHS
- self.RHS = set()
-
-
-
-
-
- def __str__(self):
- tmp = self.LHS + " -> "
- tmp2 = ""
- for c in sorted(self.RHS):
- if len(tmp2) > 0:
- tmp2 = tmp2 + ","
- tmp2 = tmp2 + c
- return tmp + tmp2
-
-
-[docs]class NonStationaryFTS(fts.FTS):
- """NonStationaryFTS Fuzzy Time Series"""
- def __init__(self, **kwargs):
- super(NonStationaryFTS, self).__init__(**kwargs)
- self.name = "Non Stationary FTS"
- self.shortname = "NSFTS"
- self.detail = ""
- self.flrgs = {}
- self.method = kwargs.get('method','conditional')
- self.is_high_order = False
- if self.partitioner is not None:
- self.append_transformation(self.partitioner.transformation)
-
- if self.method == 'conditional':
- self.min_stack = [0, 0, 0]
- self.max_stack = [0, 0, 0]
- self.uod_clip = False
- self.order = 1
- self.min_order = 1
- self.max_lag = 1
- self.inputs = []
- self.forecasts = []
- self.residuals = []
- self.variance_residual = 0.
- self.mean_residual = 0.
- self.memory_window = kwargs.get("memory_window", 5)
-
-[docs] def generate_flrg(self, flrs, **kwargs):
- for flr in flrs:
- if flr.LHS.name in self.flrgs:
- self.flrgs[flr.LHS.name].append_rhs(flr.RHS.name)
- else:
- self.flrgs[flr.LHS.name] = ConventionalNonStationaryFLRG(flr.LHS.name)
- self.flrgs[flr.LHS.name].append_rhs(flr.RHS.name)
-
- def _smooth(self, a):
- return .1 * a[0] + .3 * a[1] + .6 * a[2]
-
-[docs] def train(self, data, **kwargs):
-
- if self.method == 'unconditional':
- window_size = kwargs.get('parameters', 1)
- tmpdata = common.fuzzySeries(data, self.sets,
- self.partitioner.ordered_sets,
- window_size, method='fuzzy')
- else:
- tmpdata = common.fuzzySeries(data, self.sets,
- self.partitioner.ordered_sets,
- method='fuzzy', const_t=0)
-
- flrs = FLR.generate_non_recurrent_flrs(tmpdata)
- self.generate_flrg(flrs)
-
- if self.method == 'conditional':
- self.forecasts = self.forecast(data, no_update=True)
- self.residuals = np.array(data[1:]) - np.array(self.forecasts[:-1])
-
- self.variance_residual = np.var(self.residuals) # np.max(self.residuals
- self.mean_residual = np.mean(self.residuals)
-
- self.residuals = self.residuals[-self.memory_window:].tolist()
- self.forecasts = self.forecasts[-self.memory_window:]
- self.inputs = np.array(data[-self.memory_window:]).tolist()
-
-[docs] def conditional_perturbation_factors(self, data, **kwargs):
- npart = len(self.partitioner.sets)
- _max = 0
- _min = 0
- if data < self.original_min:
- _min = data - self.original_min if data < 0 else self.original_min - data
- elif data > self.original_max:
- _max = data - self.original_max if data > 0 else self.original_max - data
- self.min_stack.pop(2)
- self.min_stack.insert(0, _min)
- _min = min(self.min_stack)
- self.max_stack.pop(2)
- self.max_stack.insert(0, _max)
- _max = max(self.max_stack)
-
- _range = (_max - _min)/2
-
- translate = np.linspace(_min, _max, npart)
-
- var = np.std(self.residuals)
-
- var = 0 if var < 1 else var
-
- loc = (self.mean_residual + np.mean(self.residuals))
-
- location = [_range + w + loc + k for k in np.linspace(-var,var, npart) for w in translate]
-
- scale = [abs(location[0] - location[2])]
- scale.extend([abs(location[k - 1] - location[k + 1]) for k in np.arange(1, npart)])
- scale.append(abs(location[-1] - location[-3]))
-
- perturb = [[location[k], scale[k]] for k in np.arange(npart)]
-
- return perturb
-
- def _fsset_key(self, ix):
- return self.partitioner.ordered_sets[ix]
-
- def _affected_sets(self, sample, perturb):
-
- if self.method == 'conditional':
-
- affected_sets = [[ct, self.sets[self._fsset_key(ct)].membership(sample, perturb[ct])]
- for ct in np.arange(len(self.partitioner.sets))
- if self.sets[self._fsset_key(ct)].membership(sample, perturb[ct]) > 0.0]
- if len(affected_sets) == 0:
-
- if sample < self.partitioner.lower_set().get_lower(perturb[0]):
- affected_sets.append([0, 1])
- elif sample > self.partitioner.upper_set().get_upper(perturb[-1]):
- affected_sets.append([len(self.sets) - 1, 1])
-
- else:
- affected_sets = [[ct, self.sets[self._fsset_key(ct)].membership(sample, perturb)]
- for ct in np.arange(len(self.partitioner.sets))
- if self.sets[self._fsset_key(ct)].membership(sample, perturb) > 0.0]
-
- if len(affected_sets) == 0:
-
- if sample < self.partitioner.lower_set().get_lower(perturb):
- affected_sets.append([0, 1])
- elif sample > self.partitioner.upper_set().get_upper(perturb):
- affected_sets.append([len(self.sets) - 1, 1])
-
- return affected_sets
-
-[docs] def forecast(self, ndata, **kwargs):
-
- time_displacement = kwargs.get("time_displacement",0)
-
- window_size = kwargs.get("window_size", 1)
-
- no_update = kwargs.get("no_update", False)
-
- l = len(ndata)
-
- ret = []
-
- for k in np.arange(0, l):
-
- sample = ndata[k]
-
- if self.method == 'unconditional':
- perturb = common.window_index(k + time_displacement, window_size)
- elif self.method == 'conditional':
- if not no_update:
- perturb = self.conditional_perturbation_factors(sample)
- else:
- perturb = [[0, 1] for k in np.arange(len(self.partitioner.sets))]
-
- affected_sets = self._affected_sets(sample, perturb)
-
- numerator = []
- denominator = []
-
- if len(affected_sets) == 1:
- ix = affected_sets[0][0]
- aset = self.partitioner.ordered_sets[ix]
- if aset in self.flrgs:
- numerator.append(self.flrgs[aset].get_midpoint(self.sets, perturb[ix]))
- else:
- fuzzy_set = self.sets[aset]
- numerator.append(fuzzy_set.get_midpoint(perturb[ix]))
- denominator.append(1)
- else:
- for aset in affected_sets:
- ix = aset[0]
- fs = self.partitioner.ordered_sets[ix]
- tdisp = perturb[ix]
- if fs in self.flrgs:
- numerator.append(self.flrgs[fs].get_midpoint(self.sets, tdisp) * aset[1])
- else:
- fuzzy_set = self.sets[fs]
- numerator.append(fuzzy_set.get_midpoint(tdisp) * aset[1])
- denominator.append(aset[1])
-
- if sum(denominator) > 0:
- pto = sum(numerator) / sum(denominator)
- else:
- pto = sum(numerator)
-
- ret.append(pto)
-
- if self.method == 'conditional' and not no_update:
- self.forecasts.append(pto)
- self.residuals.append(self.inputs[-1] - self.forecasts[-1])
- self.inputs.append(sample)
-
- self.inputs.pop(0)
- self.forecasts.pop(0)
- self.residuals.pop(0)
-
- return ret
-
-[docs] def forecast_interval(self, ndata, **kwargs):
-
- time_displacement = kwargs.get("time_displacement", 0)
-
- window_size = kwargs.get("window_size", 1)
-
- l = len(ndata)
-
- ret = []
-
- for k in np.arange(0, l):
-
- # print("input: " + str(ndata[k]))
-
- tdisp = common.window_index(k + time_displacement, window_size)
-
- affected_sets = [[self.sets[key], self.sets[key].membership(ndata[k], tdisp)]
- for key in self.partitioner.ordered_sets
- if self.sets[key].membership(ndata[k], tdisp) > 0.0]
-
- if len(affected_sets) == 0:
- affected_sets.append([common.check_bounds(ndata[k], self.partitioner, tdisp), 1.0])
-
- upper = []
- lower = []
-
- if len(affected_sets) == 1:
- aset = affected_sets[0][0]
- if aset.name in self.flrgs:
- lower.append(self.flrgs[aset.name].get_lower(tdisp))
- upper.append(self.flrgs[aset.name].get_upper(tdisp))
- else:
- lower.append(aset.get_lower(tdisp))
- upper.append(aset.get_upper(tdisp))
- else:
- for aset in affected_sets:
- if aset[0].name in self.flrgs:
- lower.append(self.flrgs[aset[0].name].get_lower(tdisp) * aset[1])
- upper.append(self.flrgs[aset[0].name].get_upper(tdisp) * aset[1])
- else:
- lower.append(aset[0].get_lower(tdisp) * aset[1])
- upper.append(aset[0].get_upper(tdisp) * aset[1])
-
-
- ret.append([sum(lower), sum(upper)])
-
- return ret
-
Source code for pyFTS.models.nonstationary.partitioners
-import numpy as np
-from pyFTS.partitioners import partitioner
-from pyFTS.models.nonstationary import common, perturbation
-from pyFTS.common import FuzzySet as stationary_fs
-
-
-[docs]class PolynomialNonStationaryPartitioner(partitioner.Partitioner):
- """
- Non Stationary Universe of Discourse Partitioner
- """
-
- def __init__(self, data, part, **kwargs):
- """"""
- super(PolynomialNonStationaryPartitioner, self).__init__(name=part.name, data=data, npart=part.partitions,
- func=part.membership_function, names=part.setnames,
- prefix=part.prefix, transformation=part.transformation,
- indexer=part.indexer, preprocess=False)
-
- self.sets = {}
-
- loc_params, wid_params = self.get_polynomial_perturbations(data, **kwargs)
-
- if self.ordered_sets is None and self.setnames is not None:
- self.ordered_sets = part.setnames
- else:
- self.ordered_sets = stationary_fs.set_ordered(part.sets)
-
- for ct, key in enumerate(self.ordered_sets):
- set = part.sets[key]
- loc_roots = np.roots(loc_params[ct])[0]
- wid_roots = np.roots(wid_params[ct])[0]
- tmp = common.FuzzySet(set.name, set.mf, set.parameters,
- location=perturbation.polynomial,
- location_params=loc_params[ct],
- location_roots=loc_roots, #**kwargs)
- width=perturbation.polynomial,
- width_params=wid_params[ct],
- width_roots=wid_roots, **kwargs)
-
- self.sets[set.name] = tmp
-
-[docs] def poly_width(self, par1, par2, rng, deg):
- a = np.polyval(par1, rng)
- b = np.polyval(par2, rng)
- diff = [b[k] - a[k] for k in rng]
- tmp = np.polyfit(rng, diff, deg=deg)
- return tmp
-
-
-
-
-
-[docs] def get_polynomial_perturbations(self, data, **kwargs):
- w = kwargs.get("window_size", int(len(data) / 5))
- degree = kwargs.get("degree", 2)
- xmax = [data[0]]
- tmax = [0]
- xmin = [data[0]]
- tmin = [0]
-
- l = len(data)
-
- for i in np.arange(0, l, w):
- sample = data[i:i + w]
- tx = max(sample)
- xmax.append(tx)
- tmax.append(np.ravel(np.argwhere(data == tx)).tolist()[0])
- tn = min(sample)
- xmin.append(tn)
- tmin.append(np.ravel(np.argwhere(data == tn)).tolist()[0])
-
- cmax = np.polyfit(tmax, xmax, deg=degree)
- cmin = np.polyfit(tmin, xmin, deg=degree)
-
- cmed = []
-
- for d in np.arange(0, degree + 1):
- cmed.append(np.linspace(cmin[d], cmax[d], self.partitions)[1:self.partitions - 1])
-
- loc_params = [cmin.tolist()]
- for i in np.arange(0, self.partitions - 2):
- tmp = [cmed[k][i] for k in np.arange(0, degree + 1)]
- loc_params.append(tmp)
- loc_params.append(cmax.tolist())
-
- rng = np.arange(0, l)
-
- clen = []
-
- for i in np.arange(1, self.partitions-1):
- tmp = self.poly_width(loc_params[i - 1], loc_params[i + 1], rng, degree)
- clen.append(tmp)
-
- tmp = self.poly_width(loc_params[0], loc_params[1], rng, degree)
- clen.insert(0, tmp)
-
- tmp = self.poly_width(loc_params[self.partitions-2], loc_params[self.partitions-1], rng, degree)
- clen.append(tmp)
-
- tmp = (loc_params, clen)
-
- return tmp
-
-
-
-
-[docs]class SimpleNonStationaryPartitioner(partitioner.Partitioner):
- """
- Non Stationary Universe of Discourse Partitioner
- """
-
- def __init__(self, data, part, **kwargs):
- """"""
- super(SimpleNonStationaryPartitioner, self).__init__(name=part.name, data=data, npart=part.partitions,
- func=part.membership_function, names=part.setnames,
- prefix=part.prefix, transformation=part.transformation,
- indexer=part.indexer)#, preprocess=False)
-
- for key in part.sets.keys():
- set = part.sets[key]
- tmp = common.FuzzySet(set.name, set.mf, set.parameters, **kwargs)
- tmp.centroid = set.centroid
-
- self.sets[key] =tmp
-
- self.ordered_sets = stationary_fs.set_ordered(self.sets)
-
-
-
-
-[docs]def simplenonstationary_gridpartitioner_builder(data, npart, transformation):
- from pyFTS.partitioners import Grid
- from pyFTS.models.nonstationary import perturbation, partitioners
-
- tmp_fs = Grid.GridPartitioner(data=data, npart=npart, transformation=transformation)
- fs = partitioners.SimpleNonStationaryPartitioner(data, tmp_fs,
- location=perturbation.polynomial,
- location_params=[1, 0],
- location_roots=0,
- width=perturbation.polynomial,
- width_params=[1, 0],
- width_roots=0)
- return fs
-
Source code for pyFTS.models.nonstationary.perturbation
-"""
-Pertubation functions for Non Stationary Fuzzy Sets
-"""
-
-import numpy as np
-from pyFTS import *
-from pyFTS.common import FuzzySet, Membership
-
-
-
-
-
-
-
-
-
-
-
-
-
Source code for pyFTS.models.nonstationary.util
-import numpy as np
-import pandas as pd
-import matplotlib as plt
-import matplotlib.colors as pltcolors
-import matplotlib.pyplot as plt
-from pyFTS.common import Membership, Util
-
-
-[docs]def plot_sets(partitioner, start=0, end=10, step=1, tam=[5, 5], colors=None,
- save=False, file=None, axes=None, data=None, window_size = 1, only_lines=False):
-
- range = np.arange(start,end,step)
- ticks = []
- if axes is None:
- fig, axes = plt.subplots(nrows=1, ncols=1, figsize=tam)
-
- for ct, key in enumerate(partitioner.ordered_sets):
- fset = partitioner.sets[key]
- if not only_lines:
- for t in range:
- tdisp = t - (t % window_size)
- fset.membership(0, tdisp)
- param = fset.perturbated_parameters[str(tdisp)]
-
- if fset.mf == Membership.trimf:
- if t == start:
- line = axes.plot([t, t+1, t], param, label=fset.name)
- fset.metadata['color'] = line[0].get_color()
- else:
- axes.plot([t, t + 1, t], param,c=fset.metadata['color'])
-
- ticks.extend(["t+"+str(t),""])
- else:
- tmp = []
- for t in range:
- tdisp = t - (t % window_size)
- fset.membership(0, tdisp)
- param = fset.perturbated_parameters[str(tdisp)]
- tmp.append(np.polyval(param, tdisp))
- axes.plot(range, tmp, ls="--", c="blue")
-
- axes.set_ylabel("Universe of Discourse")
- axes.set_xlabel("Time")
- plt.xticks([k for k in range], ticks, rotation='vertical')
-
- handles0, labels0 = axes.get_legend_handles_labels()
- lgd = axes.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1))
-
- if data is not None:
- axes.plot(np.arange(start, start + len(data), 1), data,c="black")
-
- plt.tight_layout()
-
- Util.show_and_save_image(fig, file, save)
-
-
-[docs]def plot_sets_conditional(model, data, step=1, size=[5, 5], colors=None,
- save=False, file=None, axes=None, fig=None):
- range = np.arange(0, len(data), step)
- ticks = []
- if axes is None:
- fig, axes = plt.subplots(nrows=1, ncols=1, figsize=size)
-
- for t in range:
- model.forecast([data[t]])
- perturb = model.conditional_perturbation_factors(data[t])
-
- for ct, key in enumerate(model.partitioner.ordered_sets):
- set = model.partitioner.sets[key]
- set.perturbate_parameters(perturb[ct])
- param = set.perturbated_parameters[str(perturb[ct])]
-
- if set.mf == Membership.trimf:
- if t == 0:
- line = axes.plot([t, t+1, t], param, label=set.name)
- set.metadata['color'] = line[0].get_color()
- else:
- axes.plot([t, t + 1, t], param,c=set.metadata['color'])
-
- #ticks.extend(["t+"+str(t),""])
-
- axes.set_ylabel("Universe of Discourse")
- axes.set_xlabel("Time")
- #plt.xticks([k for k in range], ticks, rotation='vertical')
-
- handles0, labels0 = axes.get_legend_handles_labels()
- lgd = axes.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1))
-
- if data is not None:
- axes.plot(np.arange(0, len(data), 1), data,c="black")
-
- plt.tight_layout()
-
- Util.show_and_save_image(fig, file, save)
-
Source code for pyFTS.models.pwfts
-#!/usr/bin/python
-# -*- coding: utf8 -*-
-
-import numpy as np
-import pandas as pd
-import math
-from operator import itemgetter
-from pyFTS.common import FLR, FuzzySet
-from pyFTS.models import hofts, ifts
-from pyFTS.probabilistic import ProbabilityDistribution
-from itertools import product
-
-
-[docs]class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
- """High Order Probabilistic Weighted Fuzzy Logical Relationship Group"""
- def __init__(self, order):
- super(ProbabilisticWeightedFLRG, self).__init__(order)
- self.RHS = {}
- self.frequency_count = 0.0
- self.Z = None
-
-[docs] def get_membership(self, data, sets):
- if isinstance(data, (np.ndarray, list, tuple, set)):
- return np.nanprod([sets[key].membership(data[count])
- for count, key in enumerate(self.LHS, start=0)])
- else:
- return sets[self.LHS[0]].membership(data)
-
-[docs] def append_rhs(self, c, **kwargs):
- count = kwargs.get('count', 1.0)
- self.frequency_count += count
- if c in self.RHS:
- self.RHS[c] += count
- else:
- self.RHS[c] = count
-
-[docs] def lhs_conditional_probability(self, x, sets, norm, uod, nbins):
- pk = self.frequency_count / norm
-
- tmp = pk * (self.get_membership(x, sets) / self.partition_function(sets, uod, nbins=nbins))
-
- return tmp
-
-
-
-[docs] def rhs_conditional_probability(self, x, sets, uod, nbins):
- total = 0.0
- for rhs in self.RHS.keys():
- set = sets[rhs]
- wi = self.rhs_unconditional_probability(rhs)
- mv = set.membership(x) / set.partition_function(uod, nbins=nbins)
- total += wi * mv
-
- return total
-
-[docs] def partition_function(self, sets, uod, nbins=100):
- if self.Z is None:
- self.Z = 0.0
- for k in np.linspace(uod[0], uod[1], nbins):
- for key in self.LHS:
- self.Z += sets[key].membership(k)
-
- return self.Z
-
-[docs] def get_midpoint(self, sets):
- '''Return the expectation of the PWFLRG, the weighted sum'''
- if self.midpoint is None:
- self.midpoint = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].centroid
- for s in self.RHS.keys()]))
-
- return self.midpoint
-
-[docs] def get_upper(self, sets):
- if self.upper is None:
- self.upper = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].upper
- for s in self.RHS.keys()]))
-
- return self.upper
-
-[docs] def get_lower(self, sets):
- if self.lower is None:
- self.lower = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].lower
- for s in self.RHS.keys()]))
-
- return self.lower
-
- def __str__(self):
- tmp2 = ""
- for c in sorted(self.RHS.keys()):
- if len(tmp2) > 0:
- tmp2 = tmp2 + ", "
- tmp2 = tmp2 + "(" + str(round(self.RHS[c] / self.frequency_count, 3)) + ")" + c
- return self.get_key() + " -> " + tmp2
-
-
-[docs]class ProbabilisticWeightedFTS(ifts.IntervalFTS):
- """High Order Probabilistic Weighted Fuzzy Time Series"""
- def __init__(self, **kwargs):
- super(ProbabilisticWeightedFTS, self).__init__(**kwargs)
- self.shortname = "PWFTS"
- self.name = "Probabilistic FTS"
- self.detail = "Silva, P.; Guimarães, F.; Sadaei, H."
- self.flrgs = {}
- self.global_frequency_count = 0
- self.has_point_forecasting = True
- self.has_interval_forecasting = True
- self.has_probability_forecasting = True
- self.is_high_order = True
- self.min_order = 1
- self.auto_update = kwargs.get('update',False)
- self.configure_lags(**kwargs)
-
-[docs] def train(self, data, **kwargs):
-
- self.configure_lags(**kwargs)
- parameters = kwargs.get('parameters','fuzzy')
-
- if parameters == 'monotonic':
- tmpdata = self.partitioner.fuzzyfy(data, mode='sets', method='maximum')
- flrs = FLR.generate_recurrent_flrs(tmpdata)
- self.generate_flrg(flrs)
- else:
- self.generate_flrg(data)
-
-[docs] def generate_lhs_flrg(self, sample, explain=False):
- nsample = [self.partitioner.fuzzyfy(k, mode="sets", alpha_cut=self.alpha_cut)
- for k in sample]
-
- return self.generate_lhs_flrg_fuzzyfied(nsample, explain)
-
-[docs] def generate_lhs_flrg_fuzzyfied(self, sample, explain=False):
- lags = []
-
- flrgs = []
-
- for ct, o in enumerate(self.lags):
- lhs = sample[o - 1]
- lags.append( lhs )
-
- if explain:
- print("\t (Lag {}) {} -> {} \n".format(o, sample[o-1], lhs))
-
- # Trace the possible paths
- for path in product(*lags):
- flrg = ProbabilisticWeightedFLRG(self.order)
-
- for lhs in path:
- flrg.append_lhs(lhs)
-
- flrgs.append(flrg)
-
- return flrgs
-
-[docs] def generate_flrg(self, data):
- l = len(data)
- for k in np.arange(self.max_lag, l):
- if self.dump: print("FLR: " + str(k))
-
- sample = data[k - self.max_lag: k]
-
- flrgs = self.generate_lhs_flrg(sample)
-
- for flrg in flrgs:
-
- lhs_mv = flrg.get_membership(sample, self.partitioner.sets)
-
- if flrg.get_key() not in self.flrgs:
- self.flrgs[flrg.get_key()] = flrg;
-
- fuzzyfied = self.partitioner.fuzzyfy(data[k], mode='both', method='fuzzy',
- alpha_cut=self.alpha_cut)
-
- mvs = []
- for set, mv in fuzzyfied:
- self.flrgs[flrg.get_key()].append_rhs(set, count=lhs_mv * mv)
- mvs.append(mv)
-
- tmp_fq = sum([lhs_mv*kk for kk in mvs if kk > 0])
-
- self.global_frequency_count += tmp_fq
-
-
-
-
-
-[docs] def add_new_PWFLGR(self, flrg):
- if flrg.get_key() not in self.flrgs:
- tmp = ProbabilisticWeightedFLRG(self.order)
- for fs in flrg.LHS: tmp.append_lhs(fs)
- tmp.append_rhs(flrg.LHS[-1])
- self.flrgs[tmp.get_key()] = tmp;
- self.global_frequency_count += 1
-
-[docs] def flrg_lhs_unconditional_probability(self, flrg):
- if flrg.get_key() in self.flrgs:
- return self.flrgs[flrg.get_key()].frequency_count / self.global_frequency_count
- else:
- return 0.0
- #self.add_new_PWFLGR(flrg)
- #return self.flrg_lhs_unconditional_probability(flrg)
-
-[docs] def flrg_lhs_conditional_probability(self, x, flrg):
- mv = flrg.get_membership(x, self.partitioner.sets)
- pb = self.flrg_lhs_unconditional_probability(flrg)
- return mv * pb
-
-[docs] def get_midpoint(self, flrg):
- if flrg.get_key() in self.flrgs:
- tmp = self.flrgs[flrg.get_key()]
- ret = tmp.get_midpoint(self.partitioner.sets) #sum(np.array([tmp.rhs_unconditional_probability(s) * self.setsDict[s].centroid for s in tmp.RHS]))
- else:
- if len(flrg.LHS) > 0:
- pi = 1 / len(flrg.LHS)
- ret = sum(np.array([pi * self.partitioner.sets[s].centroid for s in flrg.LHS]))
- else:
- ret = np.nan
- return ret
-
-[docs] def flrg_rhs_conditional_probability(self, x, flrg):
-
- if flrg.get_key() in self.flrgs:
- _flrg = self.flrgs[flrg.get_key()]
- cond = []
- for s in _flrg.RHS.keys():
- _set = self.partitioner.sets[s]
- tmp = _flrg.rhs_unconditional_probability(s) * (_set.membership(x) / _set.partition_function(uod=self.get_UoD()))
- cond.append(tmp)
- ret = sum(np.array(cond))
- else:
- pi = 1 / len(flrg.LHS)
- ret = sum(np.array([pi * self.partitioner.sets[s].membership(x) for s in flrg.LHS]))
- return ret
-
-[docs] def get_upper(self, flrg):
- if flrg.get_key() in self.flrgs:
- tmp = self.flrgs[flrg.get_key()]
- ret = tmp.get_upper(self.partitioner.sets)
- else:
- ret = 0
- return ret
-
-[docs] def get_lower(self, flrg):
- if flrg.get_key() in self.flrgs:
- tmp = self.flrgs[flrg.get_key()]
- ret = tmp.get_lower(self.partitioner.sets)
- else:
- ret = 0
- return ret
-
-[docs] def forecast(self, data, **kwargs):
- method = kwargs.get('method','heuristic')
-
- l = len(data)
-
- ret = []
-
- for k in np.arange(self.max_lag - 1, l):
- sample = data[k - (self.max_lag - 1): k + 1]
-
- if method == 'heuristic':
- ret.append(self.point_heuristic(sample, **kwargs))
- elif method == 'expected_value':
- ret.append(self.point_expected_value(sample, **kwargs))
- else:
- raise ValueError("Unknown point forecasting method!")
-
- if self.auto_update and k > self.order+1: self.update_model(data[k - self.order - 1 : k])
-
- return ret
-
-[docs] def point_heuristic(self, sample, **kwargs):
-
- explain = kwargs.get('explain', False)
-
- if explain:
- print("Fuzzyfication \n")
-
- flrgs = self.generate_lhs_flrg(sample, explain)
-
- mp = []
- norms = []
-
- if explain:
- print("Rules:\n")
-
- for flrg in flrgs:
- norm = self.flrg_lhs_conditional_probability(sample, flrg)
-
- if norm == 0:
- norm = self.flrg_lhs_unconditional_probability(flrg)
-
-
- if explain:
- print("\t {} \t Midpoint: {}\t Norm: {}\n".format(str(self.flrgs[flrg.get_key()]),
- self.get_midpoint(flrg), norm))
-
- mp.append(norm * self.get_midpoint(flrg))
- norms.append(norm)
-
- norm = sum(norms)
-
- final = sum(mp) / norm if norm != 0 else 0
-
- if explain:
- print("Deffuzyfied value: {} \n".format(final))
- return final
-
-[docs] def point_expected_value(self, sample, **kwargs):
- explain = kwargs.get('explain', False)
-
- dist = self.forecast_distribution(sample)[0]
-
- final = dist.expected_value()
- return final
-
-[docs] def forecast_interval(self, ndata, **kwargs):
-
- method = kwargs.get('method','heuristic')
- alpha = kwargs.get('alpha', 0.05)
-
- l = len(ndata)
-
- ret = []
-
- for k in np.arange(self.max_lag - 1, l):
-
- sample = ndata[k - (self.max_lag - 1): k + 1]
-
- if method == 'heuristic':
- ret.append(self.interval_heuristic(sample))
- elif method == 'quantile':
- ret.append(self.interval_quantile(sample, alpha))
- else:
- raise ValueError("Unknown interval forecasting method!")
-
- return ret
-
-[docs] def interval_quantile(self, ndata, alpha):
- dist = self.forecast_distribution(ndata)
- itvl = dist[0].quantile([alpha, 1.0 - alpha])
- return itvl
-
-[docs] def interval_heuristic(self, sample):
-
- flrgs = self.generate_lhs_flrg(sample)
-
- up = []
- lo = []
- norms = []
- for flrg in flrgs:
- norm = self.flrg_lhs_conditional_probability(sample, flrg)
- if norm == 0:
- norm = self.flrg_lhs_unconditional_probability(flrg)
- up.append(norm * self.get_upper(flrg))
- lo.append(norm * self.get_lower(flrg))
- norms.append(norm)
-
- # gerar o intervalo
- norm = sum(norms)
- if norm == 0:
- return [0, 0]
- else:
- lo_ = sum(lo) / norm
- up_ = sum(up) / norm
- return [lo_, up_]
-
-[docs] def forecast_distribution(self, ndata, **kwargs):
-
- smooth = kwargs.get("smooth", "none")
-
- l = len(ndata)
- uod = self.get_UoD()
-
- if 'bins' in kwargs:
- _bins = kwargs.pop('bins')
- nbins = len(_bins)
- else:
- nbins = kwargs.get("num_bins", 100)
- _bins = np.linspace(uod[0], uod[1], nbins)
-
- ret = []
-
- for k in np.arange(self.max_lag - 1, l):
- sample = ndata[k - (self.max_lag - 1): k + 1]
-
- flrgs = self.generate_lhs_flrg(sample)
-
- if 'type' in kwargs:
- kwargs.pop('type')
-
- dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
-
- for bin in _bins:
- num = []
- den = []
- for s in flrgs:
- if s.get_key() in self.flrgs:
- flrg = self.flrgs[s.get_key()]
- pk = flrg.lhs_conditional_probability(sample, self.partitioner.sets, self.global_frequency_count, uod, nbins)
- wi = flrg.rhs_conditional_probability(bin, self.partitioner.sets, uod, nbins)
- num.append(wi * pk)
- den.append(pk)
- else:
- num.append(0.0)
- den.append(0.000000001)
- pf = sum(num) / sum(den)
-
- dist.set(bin, pf)
-
- ret.append(dist)
-
- return ret
-
- def __check_point_bounds(self, point):
- lower_set = self.partitioner.lower_set()
- upper_set = self.partitioner.upper_set()
- return point <= lower_set.lower or point >= upper_set.upper
-
-[docs] def forecast_ahead(self, data, steps, **kwargs):
-
- l = len(data)
-
- start = kwargs.get('start', self.max_lag)
-
- ret = data[start - self.max_lag: start].tolist()
-
- for k in np.arange(self.max_lag, steps+self.max_lag):
-
- if self.__check_point_bounds(ret[-1]) :
- ret.append(ret[-1])
- else:
- mp = self.forecast(ret[k - self.max_lag: k], **kwargs)
- ret.append(mp[0])
-
- return ret[self.max_lag:]
-
- def __check_interval_bounds(self, interval):
- if len(self.transformations) > 0:
- lower_set = self.partitioner.lower_set()
- upper_set = self.partitioner.upper_set()
- return interval[0] <= lower_set.lower and interval[1] >= upper_set.upper
- elif len(self.transformations) == 0:
- return interval[0] <= self.original_min and interval[1] >= self.original_max
-
-[docs] def forecast_ahead_interval(self, data, steps, **kwargs):
-
- l = len(data)
-
- start = kwargs.get('start', self.max_lag)
-
- sample = data[start - self.max_lag: start]
-
- ret = [[k, k] for k in sample]
-
- ret.append(self.forecast_interval(sample)[0])
-
- for k in np.arange(self.max_lag+1, steps+self.max_lag):
-
- if len(ret) > 0 and self.__check_interval_bounds(ret[-1]):
- ret.append(ret[-1])
- else:
- lower = self.forecast_interval([ret[x][0] for x in np.arange(k - self.max_lag, k)], **kwargs)
- upper = self.forecast_interval([ret[x][1] for x in np.arange(k - self.max_lag, k)], **kwargs)
-
- ret.append([np.min(lower), np.max(upper)])
-
- return ret[self.order:]
-
-[docs] def forecast_ahead_distribution(self, ndata, steps, **kwargs):
-
- ret = []
-
- smooth = kwargs.get("smooth", "none")
-
- uod = self.get_UoD()
-
- if 'bins' in kwargs:
- _bins = kwargs.pop('bins')
- nbins = len(_bins)
- else:
- nbins = kwargs.get("num_bins", 100)
- _bins = np.linspace(uod[0], uod[1], nbins)
-
- start = kwargs.get('start', self.max_lag)
-
- sample = ndata[start - self.max_lag: start]
-
- for dat in sample:
- if 'type' in kwargs:
- kwargs.pop('type')
- tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
- tmp.set(dat, 1.0)
- ret.append(tmp)
-
- dist = self.forecast_distribution(sample, bins=_bins)[0]
-
- ret.append(dist)
-
- for k in np.arange(self.max_lag+1, steps+self.max_lag+1):
- dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
-
- lags = []
-
- # Find all bins of past distributions with probability greater than zero
-
- for ct, lag in enumerate(self.lags):
- dd = ret[k - lag]
- vals = [float(v) for v in dd.bins if np.round(dd.density(v), 4) > 0.0]
- lags.append( sorted(vals) )
-
-
- # Trace all possible combinations between the bins of past distributions
-
- for path in product(*lags):
-
- # get the combined probabilities for this path
- pk = np.prod([ret[k - (self.max_lag + lag)].density(path[ct])
- for ct, lag in enumerate(self.lags)])
-
-
- d = self.forecast_distribution(path)[0]
-
- for bin in _bins:
- dist.set(bin, dist.density(bin) + pk * d.density(bin))
-
- ret.append(dist)
-
- return ret[self.order:]
-
- def __str__(self):
- tmp = self.name + ":\n"
- for r in sorted(self.flrgs.keys()):
- p = round(self.flrgs[r].frequency_count / self.global_frequency_count, 3)
- tmp = tmp + "(" + str(p) + ") " + str(self.flrgs[r]) + "\n"
- return tmp
-
-
-[docs]def visualize_distributions(model, **kwargs):
- import matplotlib.pyplot as plt
- from matplotlib import gridspec
- import seaborn as sns
-
- ordered_sets = model.partitioner.ordered_sets
- ftpg_keys = sorted(model.flrgs.keys(), key=lambda x: model.flrgs[x].get_midpoint(model.sets))
-
- lhs_probs = [model.flrg_lhs_unconditional_probability(model.flrgs[k])
- for k in ftpg_keys]
-
- mat = np.zeros((len(ftpg_keys), len(ordered_sets)))
- for row, w in enumerate(ftpg_keys):
- for col, k in enumerate(ordered_sets):
- if k in model.flrgs[w].RHS:
- mat[row, col] = model.flrgs[w].rhs_unconditional_probability(k)
-
- size = kwargs.get('size', (5,10))
-
- fig = plt.figure(figsize=size)
-
- gs = gridspec.GridSpec(1, 2, width_ratios=[1, 4])
- ax1 = plt.subplot(gs[0])
- sns.barplot(x='y', y='x', color='darkblue', data={'x': ftpg_keys, 'y': lhs_probs}, ax=ax1)
- ax1.set_ylabel("LHS Probabilities")
-
- ind_sets = range(len(ordered_sets))
- ax = plt.subplot(gs[1])
- sns.heatmap(mat, cmap='Blues', ax=ax, yticklabels=False)
- ax.set_title("RHS probabilities")
- ax.set_xticks(ind_sets)
- ax.set_xticklabels(ordered_sets)
- ax.grid(True)
- ax.xaxis.set_tick_params(rotation=90)
-
Source code for pyFTS.models.sadaei
-"""
-First Order Exponentialy Weighted Fuzzy Time Series by Sadaei et al. (2013)
-
-H. J. Sadaei, R. Enayatifar, A. H. Abdullah, and A. Gani, “Short-term load forecasting using a hybrid model with a
-refined exponentially weighted fuzzy time series and an improved harmony search,” Int. J. Electr. Power Energy Syst., vol. 62, no. from 2005, pp. 118–129, 2014.
-"""
-
-import numpy as np
-from pyFTS.common import FuzzySet,FLR,fts, flrg
-
-default_c = 1.1
-
-
-[docs]class ExponentialyWeightedFLRG(flrg.FLRG):
- """First Order Exponentialy Weighted Fuzzy Logical Relationship Group"""
- def __init__(self, LHS, **kwargs):
- super(ExponentialyWeightedFLRG, self).__init__(1, **kwargs)
- self.LHS = LHS
- self.RHS = []
- self.count = 0.0
- self.c = kwargs.get("c",default_c)
- self.w = None
-
-[docs] def append_rhs(self, c, **kwargs):
- count = kwargs.get('count', 1.0)
- self.RHS.append(c)
- self.count += count
-
-[docs] def weights(self):
- if self.w is None:
- wei = [self.c ** k for k in np.arange(0.0, self.count, 1.0)]
- tot = sum(wei)
- self.w = np.array([k / tot for k in wei])
- return self.w
-
- def __str__(self):
- tmp = self.LHS + " -> "
- tmp2 = ""
- cc = 0
- wei = [self.c ** k for k in np.arange(0.0, self.count, 1.0)]
- tot = sum(wei)
- for c in sorted(self.RHS):
- if len(tmp2) > 0:
- tmp2 = tmp2 + ","
- tmp2 = tmp2 + c + "(" + str(wei[cc] / tot) + ")"
- cc = cc + 1
- return tmp + tmp2
-
- def __len__(self):
- return len(self.RHS)
-
-
-[docs]class ExponentialyWeightedFTS(fts.FTS):
- """First Order Exponentialy Weighted Fuzzy Time Series"""
- def __init__(self, **kwargs):
- super(ExponentialyWeightedFTS, self).__init__(order=1, name="EWFTS", **kwargs)
- self.name = "Exponentialy Weighted FTS"
- self.detail = "Sadaei"
- self.c = kwargs.get('c', default_c)
-
-[docs] def generate_flrg(self, flrs, c):
- for flr in flrs:
- if flr.LHS in self.flrgs:
- self.flrgs[flr.LHS].append_rhs(flr.RHS)
- else:
- self.flrgs[flr.LHS] = ExponentialyWeightedFLRG(flr.LHS, c=c);
- self.flrgs[flr.LHS].append_rhs(flr.RHS)
-
-[docs] def train(self, data, **kwargs):
- tmpdata = self.partitioner.fuzzyfy(data, method='maximum', mode='sets')
- flrs = FLR.generate_recurrent_flrs(tmpdata)
- self.generate_flrg(flrs, self.c)
-
-[docs] def forecast(self, ndata, **kwargs):
-
- explain = kwargs.get('explain', False)
-
- if self.partitioner is not None:
- ordered_sets = self.partitioner.ordered_sets
- else:
- ordered_sets = FuzzySet.set_ordered(self.sets)
-
- data = np.array(ndata)
-
- l = len(ndata)
-
- ret = []
-
- for k in np.arange(0, l):
-
- actual = FuzzySet.get_maximum_membership_fuzzyset(ndata[k], self.sets, ordered_sets)
-
- if explain:
- print("Fuzzyfication:\n\n {} -> {} \n".format(ndata[k], actual.name))
-
- if actual.name not in self.flrgs:
- ret.append(actual.centroid)
-
- if explain:
- print("Rules:\n\n {} -> {} (Naïve)\t Midpoint: {} \n\n".format(actual.name, actual.name,actual.centroid))
-
- else:
- flrg = self.flrgs[actual.name]
- mp = flrg.get_midpoints(self.sets)
-
- final = mp.dot(flrg.weights())
-
- ret.append(final)
-
- if explain:
- print("Rules:\n\n {} \n\n ".format(str(flrg)))
- print("Midpoints: \n\n {}\n\n".format(mp))
-
- print("Deffuzyfied value: {} \n".format(final))
-
- return ret
-
Source code for pyFTS.models.seasonal.SeasonalIndexer
-import numpy as np
-import pandas as pd
-from pyFTS.models.seasonal import common
-
-
-[docs]class SeasonalIndexer(object):
- """
- Seasonal Indexer. Responsible to find the seasonal index of a data point inside its data set
- """
- def __init__(self,num_seasons, **kwargs):
- self.num_seasons = num_seasons
- self.name = kwargs.get("name","")
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-[docs]class LinearSeasonalIndexer(SeasonalIndexer):
- """Use the data array/list position to index the seasonality """
- def __init__(self, seasons, units, ignore=None, **kwargs):
- """
- Indexer for array/list position
- :param seasons: A list with the season group (i.e: 7 for week, 30 for month, etc)
- :param units: A list with the units used for each season group, the default is 1 for each
- :param ignore:
- :param kwargs:
- """
- super(LinearSeasonalIndexer, self).__init__(len(seasons), **kwargs)
- self.seasons = seasons
- self.units = units
- self.ignore = ignore
-
-[docs] def get_season_of_data(self,data):
- return self.get_season_by_index(np.arange(0, len(data)).tolist())
-
-[docs] def get_season_by_index(self, index):
- ret = []
- if not isinstance(index, (list, np.ndarray)):
- if self.num_seasons == 1:
- season = (index // self.units[0]) % self.seasons[0]
- else:
- season = []
- for ct, seasonality in enumerate(self.seasons, start=0):
- tmp = (index // self.units[ct]) % self.seasons[ct]
- if not self.ignore[ct]:
- season.append(tmp)
- ret.append(season)
- else:
- for ix in index:
- if self.num_seasons == 1:
- season = (ix // self.units[0]) % self.seasons[0]
- else:
- season = []
- for ct, seasonality in enumerate(self.seasons, start=0):
- tmp = (ix // self.units[ct]) % self.seasons[ct]
- if not self.ignore[ct]:
- season.append(tmp)
- ret.append(season)
-
- return ret
-
-[docs] def get_index_by_season(self, indexes):
- ix = 0;
-
- for count,season in enumerate(self.seasons):
- ix += season*(indexes[count])
-
- #ix += indexes[-1]
-
- return ix
-
-
-
-
-[docs]class DataFrameSeasonalIndexer(SeasonalIndexer):
- """Use the Pandas.DataFrame index position to index the seasonality """
- def __init__(self,index_fields,index_seasons, data_field,**kwargs):
- """
-
- :param index_fields: DataFrame field to use as index
- :param index_seasons: A list with the season group, i. e., multiples of positions that are considered a season (i.e: 7 for week, 30 for month, etc)
- :param data_fields: DataFrame field to use as data
- :param kwargs:
- """
- super(DataFrameSeasonalIndexer, self).__init__(len(index_seasons), **kwargs)
- self.fields = index_fields
- self.seasons = index_seasons
- self.data_field = data_field
-
-[docs] def get_season_of_data(self,data):
- #data = data.copy()
- ret = []
- for ix in data.index:
- season = []
- for c, f in enumerate(self.fields, start=0):
- if self.seasons[c] is None:
- season.append(data[f][ix])
- else:
- a = data[f][ix]
- season.append(a // self.seasons[c])
- ret.append(season)
- return ret
-
-
-
-[docs] def get_data_by_season(self, data, indexes):
- for season in indexes:
- for c, f in enumerate(self.fields, start=0):
- if self.seasons[c] is None:
- data = data[data[f]== season[c]]
- else:
- data = data[(data[f] // self.seasons[c]) == season[c]]
- return data[self.data_field]
-
-
-
-
-
-
-
-
-[docs]class DateTimeSeasonalIndexer(SeasonalIndexer):
- """Use a Pandas.DataFrame date field to index the seasonality """
- def __init__(self,date_field, index_fields, index_seasons, data_field,**kwargs):
- """
-
- :param date_field: DataFrame field that contains the datetime field used on index
- :param index_fields: List with commom.DataTime fields
- :param index_seasons: Multiples of index_fields, the default is 1
- :param data_field: DataFrame field with the time series data
- :param kwargs:
- """
- super(DateTimeSeasonalIndexer, self).__init__(len(index_seasons), **kwargs)
- self.fields = index_fields
- self.seasons = index_seasons
- self.data_field = data_field
- self.date_field = date_field
-
-[docs] def get_season_of_data(self, data):
-
- ret = []
-
- if isinstance(data, pd.DataFrame):
- for ix in data.index:
- date = data[self.date_field][ix]
- season = []
- for c, f in enumerate(self.fields, start=0):
- tmp = common.strip_datepart(date, f)
- if self.seasons[c] is not None:
- tmp = tmp // self.seasons[c]
- season.append(tmp)
- ret.append(season)
-
- elif isinstance(data, pd.Series):
- date = data[self.date_field]
- season = []
- for c, f in enumerate(self.fields, start=0):
- season.append(common.strip_datepart(date, f, self.seasons[c]))
- ret.append(season)
-
- return ret
-
-
-
-
-
-
-
-
-
-[docs] def get_index(self, data):
- return data[self.date_field].tolist() if isinstance(data, pd.DataFrame) else data[self.date_field]
-
-
-
-
-
Source code for pyFTS.models.seasonal.cmsfts
-import numpy as np
-from pyFTS.common import FuzzySet, FLR
-from pyFTS.models.seasonal import sfts
-from pyFTS.models import chen
-
-
-[docs]class ContextualSeasonalFLRG(sfts.SeasonalFLRG):
- """
- Contextual Seasonal Fuzzy Logical Relationship Group
- """
- def __init__(self, seasonality):
- super(ContextualSeasonalFLRG, self).__init__(seasonality)
- self.RHS = {}
-
-[docs] def append_rhs(self, flr, **kwargs):
- if flr.LHS in self.RHS:
- self.RHS[flr.LHS].append_rhs(flr.RHS)
- else:
- self.RHS[flr.LHS] = chen.ConventionalFLRG(flr.LHS)
- self.RHS[flr.LHS].append_rhs(flr.RHS)
-
- def __str__(self):
- tmp = str(self.LHS) + ": \n "
- tmp2 = "\t"
- for r in sorted(self.RHS):
- tmp2 += str(self.RHS[r]) + "\n\t"
- return tmp + tmp2 + "\n"
-
-
-[docs]class ContextualMultiSeasonalFTS(sfts.SeasonalFTS):
- """
- Contextual Multi-Seasonal Fuzzy Time Series
- """
- def __init__(self, **kwargs):
- super(ContextualMultiSeasonalFTS, self).__init__(**kwargs)
- self.name = "Contextual Multi Seasonal FTS"
- self.shortname = "CMSFTS "
- self.detail = ""
- self.seasonality = 1
- self.has_seasonality = True
- self.has_point_forecasting = True
- self.is_high_order = True
- self.is_multivariate = True
- self.order = 1
- self.flrgs = {}
-
-[docs] def generate_flrg(self, flrs):
- for flr in flrs:
-
- if str(flr.index) not in self.flrgs:
- self.flrgs[str(flr.index)] = ContextualSeasonalFLRG(flr.index)
-
- self.flrgs[str(flr.index)].append_rhs(flr)
-
-[docs] def train(self, data, **kwargs):
- if kwargs.get('sets', None) is not None:
- self.sets = kwargs.get('sets', None)
- if kwargs.get('parameters', None) is not None:
- self.seasonality = kwargs.get('parameters', None)
- flrs = FLR.generate_indexed_flrs(self.sets, self.indexer, data,
- transformation=self.partitioner.transformation,
- alpha_cut=self.alpha_cut)
- self.generate_flrg(flrs)
-
-[docs] def get_midpoints(self, flrg, data):
- ret = []
- for d in data:
- if d in flrg.RHS:
- ret.extend([self.sets[s].centroid for s in flrg.RHS[d].RHS])
- else:
- ret.extend([self.sets[d].centroid])
-
- return np.array(ret)
-
-[docs] def forecast(self, data, **kwargs):
- ordered_sets = FuzzySet.set_ordered(self.sets)
-
- ret = []
-
- index = self.indexer.get_season_of_data(data)
- ndata = self.indexer.get_data(data)
-
- for k in np.arange(0, len(data)):
-
- if str(index[k]) in self.flrgs:
-
- flrg = self.flrgs[str(index[k])]
-
- d = FuzzySet.get_fuzzysets(ndata[k], self.sets, ordered_sets, alpha_cut=self.alpha_cut)
-
- mp = self.get_midpoints(flrg, d)
-
- ret.append(sum(mp) / len(mp))
- else:
- ret.append(np.nan)
-
- return ret
-
-[docs] def forecast_ahead(self, data, steps, **kwargs):
- ret = []
- for i in steps:
- flrg = self.flrgs[str(i)]
-
- mp = self.get_midpoints(flrg)
-
- ret.append(sum(mp) / len(mp))
-
- return ret
-
-
-
Source code for pyFTS.models.seasonal.common
-import numpy as np
-import pandas as pd
-from enum import Enum
-from pyFTS.common import FuzzySet, Membership
-from pyFTS.partitioners import partitioner, Grid
-from datetime import date as dt, datetime as dtm
-
-
-[docs]class DateTime(Enum):
- """
- Data and Time granularity for time granularity and seasonality identification
- """
- year = 1
- half = 2 # six months
- third = 3 # four months
- quarter = 4 # three months
- sixth = 6 # two months
- month = 12
- day_of_month = 30
- day_of_year = 364
- day_of_week = 7
- hour = 24
- minute = 60
- second = 60
- hour_of_day = 24
- hour_of_week = 168
- hour_of_month = 744
- hour_of_year = 8736
- minute_of_hour = 60
- minute_of_day = 1440
- minute_of_week = 10080
- minute_of_month = 44640
- minute_of_year = 524160
- second_of_minute = 60.00001
- second_of_hour = 3600
- second_of_day = 86400
-
-
-[docs]def strip_datepart(date, date_part, mask=''):
- if isinstance(date, str):
- date = dtm.strptime(date, mask)
- if date_part == DateTime.year:
- tmp = date.year
- elif date_part == DateTime.month:
- tmp = date.month
- elif date_part in (DateTime.half, DateTime.third, DateTime.quarter, DateTime.sixth):
- tmp = (date.month // date_part.value) + 1
- elif date_part == DateTime.day_of_year:
- tmp = date.timetuple().tm_yday
- elif date_part == DateTime.day_of_month:
- tmp = date.day
- elif date_part == DateTime.day_of_week:
- tmp = date.weekday()
- elif date_part == DateTime.hour or date_part == DateTime.hour_of_day:
- tmp = date.hour
- elif date_part == DateTime.hour_of_week:
- wk = (date.weekday()-1) * 24
- tmp = date.hour + wk
- elif date_part == DateTime.hour_of_month:
- wk = (date.day-1) * 24
- tmp = date.hour + wk
- elif date_part == DateTime.hour_of_year:
- wk = (date.timetuple().tm_yday-1) * 24
- tmp = date.hour + wk
- elif date_part == DateTime.minute or date_part == DateTime.minute_of_hour:
- tmp = date.minute
- elif date_part == DateTime.minute_of_day:
- wk = date.hour * 60
- tmp = date.minute + wk
- elif date_part == DateTime.minute_of_week:
- wk1 = (date.weekday()-1) * 1440 #24 * 60
- wk2 = date.hour * 60
- tmp = date.minute + wk1 + wk2
- elif date_part == DateTime.minute_of_month:
- wk1 = (date.day - 1) * 1440 #24 * 60
- wk2 = date.hour * 60
- tmp = date.minute + wk1 + wk2
- elif date_part == DateTime.minute_of_year:
- wk1 = (date.timetuple().tm_yday - 1) * 1440 #24 * 60
- wk2 = date.hour * 60
- tmp = date.minute + wk1 + wk2
- elif date_part == DateTime.second or date_part == DateTime.second_of_minute:
- tmp = date.second
- elif date_part == DateTime.second_of_hour:
- wk1 = date.minute * 60
- tmp = date.second + wk1
- elif date_part == DateTime.second_of_day:
- wk1 = date.hour * 3600 #60 * 60
- wk2 = date.minute * 60
- tmp = date.second + wk1 + wk2
- else:
- raise Exception("Unknown DateTime value!")
-
- return tmp
-
-
-[docs]class FuzzySet(FuzzySet.FuzzySet):
- """
- Temporal/Seasonal Fuzzy Set
- """
-
- def __init__(self, datepart, name, mf, parameters, centroid, alpha=1.0, **kwargs):
- super(FuzzySet, self).__init__(name, mf, parameters, centroid, alpha,
- **kwargs)
- self.datepart = datepart
- self.type = kwargs.get('type', 'seasonal')
-
-[docs] def transform(self, x):
- if self.type == 'seasonal' and isinstance(x, (dt, pd.Timestamp)):
- dp = strip_datepart(x, self.datepart)
- else:
- dp = x
-
- return dp
-
Source code for pyFTS.models.seasonal.msfts
-import numpy as np
-from pyFTS.common import FLR
-from pyFTS.models.seasonal import sfts
-
-
-[docs]class MultiSeasonalFTS(sfts.SeasonalFTS):
- """
- Multi-Seasonal Fuzzy Time Series
- """
- def __init__(self, name, indexer, **kwargs):
- super(MultiSeasonalFTS, self).__init__("MSFTS")
- self.name = "Multi Seasonal FTS"
- self.shortname = "MSFTS " + name
- self.detail = ""
- self.seasonality = 1
- self.has_seasonality = True
- self.has_point_forecasting = True
- self.is_high_order = False
- self.is_multivariate = True
- self.indexer = indexer
- self.flrgs = {}
-
-[docs] def generate_flrg(self, flrs):
- for flr in flrs:
-
- if str(flr.index) not in self.flrgs:
- self.flrgs[str(flr.index)] = sfts.SeasonalFLRG(flr.index)
-
- self.flrgs[str(flr.index)].append_rhs(flr.RHS)
-
-[docs] def train(self, data, **kwargs):
- if kwargs.get('sets', None) is not None:
- self.sets = kwargs.get('sets', None)
- if kwargs.get('parameters', None) is not None:
- self.seasonality = kwargs.get('parameters', None)
- #ndata = self.indexer.set_data(data,self.doTransformations(self.indexer.get_data(data)))
- flrs = FLR.generate_indexed_flrs(self.sets, self.indexer, data)
- self.generate_flrg(flrs)
-
-[docs] def forecast(self, data, **kwargs):
-
- ret = []
-
- index = self.indexer.get_season_of_data(data)
- ndata = self.indexer.get_data(data)
-
- for k in np.arange(0, len(index)):
-
- flrg = self.flrgs[str(index[k])]
-
- mp = self.getMidpoints(flrg)
-
- ret.append(sum(mp) / len(mp))
-
- return ret
-
-[docs] def forecast_ahead(self, data, steps, **kwargs):
- ret = []
- for i in steps:
- flrg = self.flrgs[str(i)]
-
- mp = self.getMidpoints(flrg)
-
- ret.append(sum(mp) / len(mp))
-
- return ret
-
Source code for pyFTS.models.seasonal.partitioner
-from pyFTS.common import Membership, FuzzySet as FS
-from pyFTS.common.Composite import FuzzySet as Composite
-from pyFTS.partitioners import partitioner, Grid
-from pyFTS.models.seasonal.common import DateTime, FuzzySet, strip_datepart
-import numpy as np
-import matplotlib.pylab as plt
-from scipy.spatial import KDTree
-
-
-[docs]class TimeGridPartitioner(partitioner.Partitioner):
- """Even Length DateTime Grid Partitioner"""
-
- def __init__(self, **kwargs):
- """
- Even Length Grid Partitioner
- :param seasonality: Time granularity, from pyFTS.models.seasonal.common.DateTime
- :param data: Training data of which the universe of discourse will be extracted. The universe of discourse is the open interval between the minimum and maximum values of the training data.
- :param npart: The number of universe of discourse partitions, i.e., the number of fuzzy sets that will be created
- :param func: Fuzzy membership function (pyFTS.common.Membership)
- """
- super(TimeGridPartitioner, self).__init__(name="TimeGrid", preprocess=False, **kwargs)
-
- self.season = kwargs.get('seasonality', DateTime.day_of_year)
- '''Seasonality, a pyFTS.models.seasonal.common.DateTime object'''
- self.mask = kwargs.get('mask', '%Y-%m-%d %H:%M:%S')
- '''A string with datetime formating mask'''
-
- data = kwargs.get('data', None)
- if self.season == DateTime.year:
- ndata = [strip_datepart(k, self.season) for k in data]
- self.min = min(ndata)
- self.max = max(ndata)
- else:
- tmp = (self.season.value / self.partitions) / 2
- self.min = tmp
- self.max = self.season.value + tmp
-
- self.type = kwargs.get('type','seasonal')
-
- self.sets = self.build(None)
-
- if self.ordered_sets is None and self.setnames is not None:
- self.ordered_sets = self.setnames
- else:
- self.ordered_sets = FS.set_ordered(self.sets)
-
- if self.type == 'seasonal':
- self.extractor = lambda x: strip_datepart(x, self.season, self.mask)
-
-[docs] def build(self, data):
- sets = {}
-
- kwargs = {'variable': self.variable, 'type': self.type }
-
- if self.season == DateTime.year:
- dlen = (self.max - self.min)
- partlen = dlen / self.partitions
- elif self.season == DateTime.day_of_week:
- self.min, self.max, partlen, pl2 = 0, 7, 1, 1
- elif self.season == DateTime.minute:
- self.min, self.max, partlen, pl2 = 0, 60, 1, 1
- elif self.season == DateTime.hour:
- self.min, self.max, partlen, pl2 = 0, 24, 1, 1
- elif self.season == DateTime.month:
- self.min, self.max, partlen, pl2 = 1, 13, 1, 1
- elif self.season in (DateTime.half, DateTime.third, DateTime.quarter, DateTime.sixth):
- self.min, self.max, partlen, pl2 = 1, self.season.value+1, 1, 1
- else:
- partlen = self.season.value / self.partitions
- pl2 = partlen / 2
-
- count = 0
- for c in np.arange(self.min, self.max, partlen):
- set_name = self.get_name(count)
- if self.membership_function == Membership.trimf:
- if c == self.min:
- tmp = Composite(set_name, superset=True, **kwargs)
- tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
- [self.season.value - pl2, self.season.value,
- self.season.value + pl2], self.season.value, alpha=1,
- **kwargs))
- tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
- [c - partlen, c, c + partlen], c,
- **kwargs))
- tmp.centroid = c
- sets[set_name] = tmp
- elif c == self.max - partlen:
- tmp = Composite(set_name, superset=True, **kwargs)
- tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
- [-pl2, 0.0,
- pl2], 0.0, alpha=1,
- **kwargs))
- tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
- [c - partlen, c, c + partlen], c,
- **kwargs))
- tmp.centroid = c
- sets[set_name] = tmp
- else:
- sets[set_name] = FuzzySet(self.season, set_name, Membership.trimf,
- [c - partlen, c, c + partlen], c,
- **kwargs)
- elif self.membership_function == Membership.gaussmf:
- sets[set_name] = FuzzySet(self.season, set_name, Membership.gaussmf, [c, partlen / 3], c,
- **kwargs)
- elif self.membership_function == Membership.trapmf:
- q = partlen / 4
- if c == self.min:
- tmp = Composite(set_name, superset=True)
- tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
- [self.season.value - pl2, self.season.value,
- self.season.value + 0.0000001], 0,
- **kwargs))
- tmp.append_set(FuzzySet(self.season, set_name, Membership.trapmf,
- [c - partlen, c - q, c + q, c + partlen], c,
- **kwargs))
- tmp.centroid = c
- sets[set_name] = tmp
- else:
- sets[set_name] = FuzzySet(self.season, set_name, Membership.trapmf,
- [c - partlen, c - q, c + q, c + partlen], c,
- **kwargs)
- count += 1
-
- self.min = 0
-
- return sets
-
-[docs] def build_index(self):
- points = []
-
- fset = self.sets[self.ordered_sets[0]]
- points.append([fset.sets[1].lower, fset.sets[1].centroid, fset.sets[1].upper])
-
- for ct, key in enumerate(self.ordered_sets[1:-1]):
- fset = self.sets[key]
- points.append([fset.lower, fset.centroid, fset.upper])
-
- fset = self.sets[self.ordered_sets[-1]]
- points.append([fset.sets[1].lower, fset.sets[1].centroid, fset.sets[1].upper])
-
- import sys
- sys.setrecursionlimit(100000)
-
- self.kdtree = KDTree(points)
-
- sys.setrecursionlimit(1000)
-
-[docs] def search(self, data, **kwargs):
- '''
- Perform a search for the nearest fuzzy sets of the point 'data'. This function were designed to work with several
- overlapped fuzzy sets.
-
- :param data: the value to search for the nearest fuzzy sets
- :param type: the return type: 'index' for the fuzzy set indexes or 'name' for fuzzy set names.
- :param results: the number of nearest fuzzy sets to return
- :return: a list with the nearest fuzzy sets
- '''
-
- type = kwargs.get('type','index')
- results = kwargs.get('results',3)
-
- if self.kdtree is None:
- self.build_index()
-
- _, ix = self.kdtree.query([data, data, data], results)
-
- ix = ix.tolist()
-
- if 0 in ix:
- ix.insert(0, self.partitions-1)
- elif self.partitions-1 in ix:
- ix.insert(0, 0)
-
- if type == 'name':
- return [self.ordered_sets[k] for k in sorted(ix)]
- else:
- return sorted(ix)
-
-
-[docs] def plot(self, ax):
- """
- Plot the
- :param ax:
- :return:
- """
- ax.set_title(self.name)
- ax.set_ylim([0, 1])
- ax.set_xlim([0, self.season.value])
- ticks = []
- x = []
- for key in self.sets.keys():
- s = self.sets[key]
- if s.type == 'composite':
- for ss in s.sets:
- self.plot_set(ax, ss)
- else:
- self.plot_set(ax, s)
- ticks.append(str(round(s.centroid, 0)) + '\n' + s.name)
- x.append(s.centroid)
- ax.xaxis.set_ticklabels(ticks)
- ax.xaxis.set_ticks(x)
-
Source code for pyFTS.models.seasonal.sfts
-"""
-Simple First Order Seasonal Fuzzy Time Series implementation of Song (1999) based of Conventional FTS by Chen (1996)
-
-Q. Song, “Seasonal forecasting in fuzzy time series,” Fuzzy sets Syst., vol. 107, pp. 235–236, 1999.
-
-S.-M. Chen, “Forecasting enrollments based on fuzzy time series,” Fuzzy Sets Syst., vol. 81, no. 3, pp. 311–319, 1996.
-"""
-
-import numpy as np
-from pyFTS.common import FuzzySet, FLR, flrg, fts
-
-
-[docs]class SeasonalFLRG(flrg.FLRG):
- """First Order Seasonal Fuzzy Logical Relationship Group"""
- def __init__(self, seasonality):
- super(SeasonalFLRG, self).__init__(1)
- self.LHS = seasonality
- self.RHS = []
-
-
-
-
-
- def __str__(self):
- tmp = str(self.LHS) + " -> "
- tmp2 = ""
- for c in sorted(self.RHS, key=lambda s: str(s)):
- if len(tmp2) > 0:
- tmp2 = tmp2 + ","
- tmp2 = tmp2 + str(c)
- return tmp + tmp2
-
- def __len__(self):
- return len(self.RHS)
-
-
-[docs]class SeasonalFTS(fts.FTS):
- """First Order Seasonal Fuzzy Time Series"""
- def __init__(self, **kwargs):
- super(SeasonalFTS, self).__init__(**kwargs)
- self.name = "Seasonal FTS"
- self.shortname = "SFTS"
- self.order = 1
- self.seasonality = 1
- self.has_seasonality = True
- self.has_point_forecasting = True
- self.is_high_order = False
- self.flrgs = {}
-
-[docs] def generate_flrg(self, flrs):
-
- for ct, flr in enumerate(flrs, start=1):
-
- season = self.indexer.get_season_by_index(ct)[0]
-
- ss = str(season)
-
- if ss not in self.flrgs:
- self.flrgs[ss] = SeasonalFLRG(season)
-
- #print(season)
- self.flrgs[ss].append_rhs(flr.RHS)
-
-[docs] def get_midpoints(self, flrg):
- ret = np.array([self.sets[s].centroid for s in flrg.RHS])
- return ret
-
-[docs] def train(self, data, **kwargs):
- if kwargs.get('sets', None) is not None:
- self.sets = kwargs.get('sets', None)
- tmpdata = FuzzySet.fuzzyfy_series_old(data, self.sets)
- flrs = FLR.generate_non_recurrent_flrs(tmpdata)
- self.generate_flrg(flrs)
-
-[docs] def forecast(self, data, **kwargs):
-
- l = len(data)
-
- ret = []
-
- for k in np.arange(0, l):
-
- season = self.indexer.get_season_by_index(k)[0]
-
- flrg = self.flrgs[str(season)]
-
- mp = self.get_midpoints(flrg)
-
- ret.append(np.percentile(mp, 50))
-
- return ret
-
- def __str__(self):
- """String representation of the model"""
-
- tmp = self.name + ":\n"
- for r in self.flrgs:
- tmp = tmp + str(self.flrgs[r]) + "\n"
- return tmp
-
Source code for pyFTS.models.song
-"""
-First Order Traditional Fuzzy Time Series method by Song & Chissom (1993)
-
-Q. Song and B. S. Chissom, “Fuzzy time series and its models,” Fuzzy Sets Syst., vol. 54, no. 3, pp. 269–277, 1993.
-"""
-
-import numpy as np
-from pyFTS.common import FuzzySet, FLR, fts
-
-
-[docs]class ConventionalFTS(fts.FTS):
- """Traditional Fuzzy Time Series"""
- def __init__(self, **kwargs):
- super(ConventionalFTS, self).__init__(order=1, name="FTS", **kwargs)
- self.name = "Traditional FTS"
- self.detail = "Song & Chissom"
- if self.sets is not None and self.partitioner is not None:
- self.sets = self.partitioner.sets
-
- self.R = None
-
- if self.sets is not None:
- l = len(self.sets)
- self.R = np.zeros((l,l))
-
-[docs] def flr_membership_matrix(self, flr):
- ordered_set = FuzzySet.set_ordered(self.sets)
- centroids = [self.sets[k].centroid for k in ordered_set]
- lm = [self.sets[flr.LHS].membership(k) for k in centroids]
- rm = [self.sets[flr.RHS].membership(k) for k in centroids]
-
- l = len(ordered_set)
- r = np.zeros((l, l))
- for k in range(0,l):
- for l in range(0, l):
- r[k][l] = min(lm[k], rm[l])
-
- return r
-
-[docs] def operation_matrix(self, flrs):
- l = len(self.sets)
- if self.R is None or len(self.R) == 0 :
- self.R = np.zeros((l, l))
- for k in flrs:
- mm = self.flr_membership_matrix(k)
- for k in range(0, l):
- for l in range(0, l):
- self.R[k][l] = max(self.R[k][l], mm[k][l])
-
-
-[docs] def train(self, data, **kwargs):
-
- tmpdata = self.partitioner.fuzzyfy(data, method='maximum', mode='sets')
- flrs = FLR.generate_non_recurrent_flrs(tmpdata)
- self.operation_matrix(flrs)
-
-[docs] def forecast(self, ndata, **kwargs):
-
- if self.partitioner is not None:
- ordered_sets = self.partitioner.ordered_sets
- else:
- ordered_sets = FuzzySet.set_ordered(self.sets)
-
- l = len(ndata)
- npart = len(self.sets)
-
- ret = []
-
- for k in np.arange(0, l):
- mv = FuzzySet.fuzzyfy_instance(ndata[k], self.sets)
-
- r = [max([ min(self.R[i][j], mv[j]) for j in np.arange(0,npart) ]) for i in np.arange(0,npart)]
-
- fs = np.ravel(np.argwhere(r == max(r)))
-
- if len(fs) == 1:
- ret.append(self.sets[ordered_sets[fs[0]]].centroid)
- else:
- mp = [self.sets[ordered_sets[s]].centroid for s in fs]
-
- ret.append( sum(mp)/len(mp))
-
- return ret
-
- def __str__(self):
- tmp = self.name + ":\n"
- return tmp + str(self.R)
-
Source code for pyFTS.models.yu
-"""
-First Order Weighted Fuzzy Time Series by Yu(2005)
-
-H.-K. Yu, “Weighted fuzzy time series models for TAIEX forecasting,”
-Phys. A Stat. Mech. its Appl., vol. 349, no. 3, pp. 609–624, 2005.
-"""
-
-import numpy as np
-from pyFTS.common import FuzzySet, FLR, fts, flrg
-from pyFTS.models import chen
-
-
-[docs]class WeightedFLRG(flrg.FLRG):
- """First Order Weighted Fuzzy Logical Relationship Group"""
- def __init__(self, LHS, **kwargs):
- super(WeightedFLRG, self).__init__(1, **kwargs)
- self.LHS = LHS
- self.RHS = []
- self.count = 1.0
- self.w = None
-
-[docs] def append_rhs(self, c, **kwargs):
- count = kwargs.get('count', 1.0)
- self.RHS.append(c)
- self.count += count
-
-[docs] def weights(self, sets):
- if self.w is None:
- tot = sum(np.arange(1.0, self.count, 1.0))
- self.w = np.array([k / tot for k in np.arange(1.0, self.count, 1.0)])
- return self.w
-
- def __str__(self):
- tmp = self.LHS + " -> "
- tmp2 = ""
- cc = 1.0
- tot = sum(np.arange(1.0, self.count, 1.0))
- for c in sorted(self.RHS):
- if len(tmp2) > 0:
- tmp2 = tmp2 + ","
- tmp2 = tmp2 + c + "(" + str(round(cc / tot, 3)) + ")"
- cc = cc + 1.0
- return tmp + tmp2
-
-
-[docs]class WeightedFTS(fts.FTS):
- """First Order Weighted Fuzzy Time Series"""
- def __init__(self, **kwargs):
- super(WeightedFTS, self).__init__(order=1, name="WFTS", **kwargs)
- self.name = "Weighted FTS"
- self.detail = "Yu"
-
-[docs] def generate_FLRG(self, flrs):
- for flr in flrs:
- if flr.LHS in self.flrgs:
- self.flrgs[flr.LHS].append_rhs(flr.RHS)
- else:
- self.flrgs[flr.LHS] = WeightedFLRG(flr.LHS);
- self.flrgs[flr.LHS].append_rhs(flr.RHS)
-
-[docs] def train(self, ndata, **kwargs):
- tmpdata = self.partitioner.fuzzyfy(ndata, method='maximum', mode='sets')
- flrs = FLR.generate_recurrent_flrs(tmpdata)
- self.generate_FLRG(flrs)
-
-[docs] def forecast(self, ndata, **kwargs):
-
- explain = kwargs.get('explain', False)
-
- if self.partitioner is not None:
- ordered_sets = self.partitioner.ordered_sets
- else:
- ordered_sets = FuzzySet.set_ordered(self.sets)
-
- ndata = np.array(ndata)
-
- l = len(ndata) if not explain else 1
-
- ret = []
-
- for k in np.arange(0, l):
-
- actual = FuzzySet.get_maximum_membership_fuzzyset(ndata[k], self.sets, ordered_sets)
-
- if explain:
- print("Fuzzyfication:\n\n {} -> {} \n\n".format(ndata[k], actual.name))
-
- if actual.name not in self.flrgs:
- ret.append(actual.centroid)
-
- if explain:
- print("Rules:\n\n {} -> {} (Naïve)\t Midpoint: {} \n\n".format(actual.name, actual.name,actual.centroid))
-
- else:
- flrg = self.flrgs[actual.name]
- mp = flrg.get_midpoints(self.sets)
-
- final = mp.dot(flrg.weights(self.sets))
-
- ret.append(final)
-
- if explain:
- print("Rules:\n\n {} \n\n ".format(str(flrg)))
- print("Midpoints: \n\n {}\n\n".format(mp))
-
- print("Deffuzyfied value: {} \n".format(final))
-
- return ret
-
Source code for pyFTS.partitioners.CMeans
-import numpy as np
-import math
-import random as rnd
-import functools, operator
-from pyFTS.common import FuzzySet, Membership
-from pyFTS.partitioners import partitioner
-
-
-[docs]def distance(x, y):
- if isinstance(x, list):
- tmp = functools.reduce(operator.add, [(x[k] - y[k]) ** 2 for k in range(0, len(x))])
- else:
- tmp = (x - y) ** 2
- return math.sqrt(tmp)
-
-
-[docs]def c_means(k, dados, tam):
- # Inicializa as centróides escolhendo elementos aleatórios dos conjuntos
- centroides = [dados[rnd.randint(0, len(dados)-1)] for kk in range(0, k)]
-
- grupos = [-1 for x in range(0, len(dados))]
-
- it_semmodificacao = 0
-
- # para cada instância
- iteracoes = 0
- while iteracoes < 1000 and it_semmodificacao < 10:
- inst_count = 0
-
- modificacao = False
-
- for instancia in dados:
-
- # verifica a distância para cada centroide
- grupo_count = 0
- dist = 10000
-
- grupotmp = grupos[inst_count]
-
- for grupo in centroides:
- tmp = distance(instancia, grupo)
- if tmp < dist:
- dist = tmp
- # associa a a centroide de menor distância à instância
- grupos[inst_count] = grupo_count
- grupo_count = grupo_count + 1
-
- if grupotmp != grupos[inst_count]:
- modificacao = True
-
- inst_count = inst_count + 1
-
- if not modificacao:
- it_semmodificacao = it_semmodificacao + 1
- else:
- it_semmodificacao = 0
-
- # atualiza cada centroide com base nos valores médios de todas as instâncias à ela associadas
- grupo_count = 0
- for grupo in centroides:
- total_inst = functools.reduce(operator.add, [1 for xx in grupos if xx == grupo_count], 0)
- if total_inst > 0:
- if tam > 1:
- for count in range(0, tam):
- soma = functools.reduce(operator.add,
- [dados[kk][count] for kk in range(0, len(dados)) if
- grupos[kk] == grupo_count])
- centroides[grupo_count][count] = soma / total_inst
- else:
- soma = functools.reduce(operator.add,
- [dados[kk] for kk in range(0, len(dados)) if grupos[kk] == grupo_count])
- centroides[grupo_count] = soma / total_inst
- grupo_count = grupo_count + 1
-
- iteracoes = iteracoes + 1
-
- return centroides
-
-
-[docs]class CMeansPartitioner(partitioner.Partitioner):
- def __init__(self, **kwargs):
- super(CMeansPartitioner, self).__init__(name="CMeans", **kwargs)
-
-[docs] def build(self, data):
- sets = {}
-
- kwargs = {'type': self.type, 'variable': self.variable}
-
- centroides = c_means(self.partitions, data, 1)
- centroides.append(self.max)
- centroides.append(self.min)
- centroides = list(set(centroides))
- centroides.sort()
- for c in np.arange(1, len(centroides) - 1):
- _name = self.get_name(c)
- sets[_name] = FuzzySet.FuzzySet(_name, Membership.trimf,
- [round(centroides[c - 1], 3), round(centroides[c], 3), round(centroides[c + 1], 3)],
- round(centroides[c], 3), **kwargs)
-
- return sets
-
Source code for pyFTS.partitioners.Entropy
-"""
-C. H. Cheng, R. J. Chang, and C. A. Yeh, “Entropy-based and trapezoidal fuzzification-based fuzzy time series approach for forecasting IT project cost,”
-Technol. Forecast. Social Change, vol. 73, no. 5, pp. 524–542, Jun. 2006.
-"""
-
-import numpy as np
-import math
-import random as rnd
-import functools, operator
-from pyFTS.common import FuzzySet, Membership
-from pyFTS.partitioners import partitioner
-
-
-
-
-
-
-
-
-[docs]def PMF(data, threshold):
- a = sum([1.0 for k in splitBelow(data,threshold)])
- b = sum([1.0 for k in splitAbove(data, threshold)])
- l = len(data)
- return [a / l, b / l]
-
-
-[docs]def entropy(data, threshold):
- pmf = PMF(data, threshold)
- if pmf[0] == 0 or pmf[1] == 0:
- return 1
- else:
- return - sum([pmf[0] * math.log(pmf[0]), pmf[1] * math.log(pmf[1])])
-
-
-[docs]def informationGain(data, thres1, thres2):
- return entropy(data, thres1) - entropy(data, thres2)
-
-
-[docs]def bestSplit(data, npart):
- if len(data) < 2:
- return []
- count = 1
- ndata = list(set(np.array(data).flatten()))
- ndata.sort()
- l = len(ndata)
- threshold = 0
- try:
- while count < l and informationGain(data, ndata[count - 1], ndata[count]) <= 0:
- threshold = ndata[count]
- count += 1
- except IndexError:
- print(threshold)
- print (ndata)
- print (count)
-
- rem = npart % 2
-
- if (npart - rem)/2 > 1:
- p1 = splitBelow(data,threshold)
- p2 = splitAbove(data,threshold)
-
- if len(p1) > len(p2):
- np1 = (npart - rem)/2 + rem
- np2 = (npart - rem)/2
- else:
- np1 = (npart - rem) / 2
- np2 = (npart - rem) / 2 + rem
-
- tmp = [threshold]
-
- for k in bestSplit(p1, np1 ): tmp.append(k)
- for k in bestSplit(p2, np2 ): tmp.append(k)
-
- return tmp
-
- else:
- return [threshold]
-
-
-[docs]class EntropyPartitioner(partitioner.Partitioner):
- """Huarng Entropy Partitioner"""
- def __init__(self, **kwargs):
- super(EntropyPartitioner, self).__init__(name="Entropy", **kwargs)
-
-[docs] def build(self, data):
- sets = {}
-
- kwargs = {'type': self.type, 'variable': self.variable}
-
- partitions = bestSplit(data, self.partitions)
- partitions.append(self.min)
- partitions.append(self.max)
- partitions = list(set(partitions))
- partitions.sort()
- for c in np.arange(1, len(partitions)-1):
- _name = self.get_name(c-1)
- if self.membership_function == Membership.trimf:
- sets[_name] = FuzzySet.FuzzySet(_name, Membership.trimf,
- [partitions[c - 1], partitions[c], partitions[c + 1]],partitions[c], **kwargs)
- elif self.membership_function == Membership.trapmf:
- b1 = (partitions[c] - partitions[c - 1])/2
- b2 = (partitions[c + 1] - partitions[c]) / 2
- sets[_name] = FuzzySet.FuzzySet(_name, Membership.trapmf,
- [partitions[c - 1], partitions[c] - b1,
- partitions[c] + b2, partitions[c + 1]],
- partitions[c], **kwargs)
-
- return sets
-
Source code for pyFTS.partitioners.FCM
-"""
-S. T. Li, Y. C. Cheng, and S. Y. Lin, “A FCM-based deterministic forecasting model for fuzzy time series,”
-Comput. Math. Appl., vol. 56, no. 12, pp. 3052–3063, Dec. 2008. DOI: 10.1016/j.camwa.2008.07.033.
-"""
-import numpy as np
-import math
-import random as rnd
-import functools, operator
-from pyFTS.common import FuzzySet, Membership
-from pyFTS.partitioners import partitioner
-
-
-[docs]def fuzzy_distance(x, y):
- if isinstance(x, list):
- tmp = functools.reduce(operator.add, [(x[k] - y[k]) ** 2 for k in range(0, len(x))])
- else:
- tmp = (x - y) ** 2
- return math.sqrt(tmp)
-
-
-[docs]def membership(val, vals):
- soma = 0
- for k in vals:
- if k == 0:
- k = 1
- soma = soma + (val / k) ** 2
-
- return soma
-
-
-[docs]def fuzzy_cmeans(k, dados, tam, m, deltadist=0.001):
- tam_dados = len(dados)
-
- # Inicializa as centróides escolhendo elementos aleatórios dos conjuntos
- centroides = [dados[rnd.randint(0, tam_dados - 1)] for kk in range(0, k)]
-
- # Tabela de pertinência das instâncias aos grupos
- grupos = [[0 for kk in range(0, k)] for xx in range(0, tam_dados)]
-
- alteracaomedia = 1000
-
- m_exp = 1 / (m - 1)
-
- # para cada instância
- iteracoes = 0
-
- while iteracoes < 1000 and alteracaomedia > deltadist:
-
- alteracaomedia = 0
-
- # verifica a distância para cada centroide
- # Atualiza a pertinencia daquela instância para cada um dos grupos
-
- inst_count = 0
- for instancia in dados:
-
- dist_grupos = [0 for xx in range(0, k)]
-
- grupo_count = 0
- for grupo in centroides:
- dist_grupos[grupo_count] = fuzzy_distance(grupo, instancia)
- grupo_count = grupo_count + 1
-
- dist_grupos_total = functools.reduce(operator.add, [xk for xk in dist_grupos])
-
- for grp in range(0, k):
- if dist_grupos[grp] == 0:
- grupos[inst_count][grp] = 1
- else:
- grupos[inst_count][grp] = 1 / membership(dist_grupos[grp], dist_grupos)
- # grupos[inst_count][grp] = 1/(dist_grupos[grp] / dist_grupos_total)
- # grupos[inst_count][grp] = (1/(dist_grupos[grp]**2))**m_exp / (1/(dist_grupos_total**2))**m_exp
-
- inst_count = inst_count + 1
-
- # return centroides
-
- # atualiza cada centroide com base na Média de todos os padrões ponderados pelo grau de pertinência
-
- grupo_count = 0
- for grupo in centroides:
- if tam > 1:
- oldgrp = [xx for xx in grupo]
- for atr in range(0, tam):
- soma = functools.reduce(operator.add,
- [grupos[xk][grupo_count] * dados[xk][atr] for xk in range(0, tam_dados)])
- norm = functools.reduce(operator.add, [grupos[xk][grupo_count] for xk in range(0, tam_dados)])
- centroides[grupo_count][atr] = soma / norm
- else:
- oldgrp = grupo
- soma = functools.reduce(operator.add,
- [grupos[xk][grupo_count] * dados[xk] for xk in range(0, tam_dados)])
- norm = functools.reduce(operator.add, [grupos[xk][grupo_count] for xk in range(0, tam_dados)])
- centroides[grupo_count] = soma / norm
-
- alteracaomedia = alteracaomedia + fuzzy_distance(oldgrp, grupo)
- grupo_count = grupo_count + 1
-
- alteracaomedia = alteracaomedia / k
- iteracoes = iteracoes + 1
-
- return centroides
-
-
-[docs]class FCMPartitioner(partitioner.Partitioner):
- """
-
- """
-
- def __init__(self, **kwargs):
- super(FCMPartitioner, self).__init__(name="FCM", **kwargs)
-
-[docs] def build(self, data):
- sets = {}
-
- kwargs = {'type': self.type, 'variable': self.variable}
-
- centroids = fuzzy_cmeans(self.partitions, data, 1, 2)
- centroids.append(self.max)
- centroids.append(self.min)
- centroids = list(set(centroids))
- centroids.sort()
- for c in np.arange(1, len(centroids) - 1):
- _name = self.get_name(c)
- if self.membership_function == Membership.trimf:
- sets[_name] = FuzzySet.FuzzySet(_name, Membership.trimf,
- [round(centroids[c - 1], 3), round(centroids[c], 3),
- round(centroids[c + 1], 3)],
- round(centroids[c], 3), **kwargs)
- elif self.membership_function == Membership.trapmf:
- q1 = (round(centroids[c], 3) - round(centroids[c - 1], 3)) / 2
- q2 = (round(centroids[c + 1], 3) - round(centroids[c], 3)) / 2
- sets[_name] = FuzzySet.FuzzySet(_name, Membership.trimf,
- [round(centroids[c - 1], 3), round(centroids[c], 3) - q1,
- round(centroids[c], 3) + q2, round(centroids[c + 1], 3)],
- round(centroids[c], 3), **kwargs)
-
- return sets
-
Source code for pyFTS.partitioners.Grid
-"""Even Length Grid Partitioner"""
-
-import numpy as np
-import math
-import random as rnd
-import functools, operator
-from pyFTS.common import FuzzySet, Membership
-from pyFTS.partitioners import partitioner
-
-
-[docs]class GridPartitioner(partitioner.Partitioner):
- """Even Length Grid Partitioner"""
-
- def __init__(self, **kwargs):
- """
- Even Length Grid Partitioner
- """
- super(GridPartitioner, self).__init__(name="Grid", **kwargs)
-
-[docs] def build(self, data):
- sets = {}
-
- kwargs = {'type': self.type, 'variable': self.variable}
-
- dlen = self.max - self.min
- partlen = dlen / self.partitions
-
- count = 0
- for c in np.arange(self.min, self.max, partlen):
- _name = self.get_name(count)
- if self.membership_function == Membership.trimf:
- sets[_name] = FuzzySet.FuzzySet(_name, Membership.trimf, [c - partlen, c, c + partlen],c,**kwargs)
- elif self.membership_function == Membership.gaussmf:
- sets[_name] = FuzzySet.FuzzySet(_name, Membership.gaussmf, [c, partlen / 3], c,**kwargs)
- elif self.membership_function == Membership.trapmf:
- q = partlen / 2
- sets[_name] = FuzzySet.FuzzySet(_name, Membership.trapmf, [c - partlen, c - q, c + q, c + partlen], c,**kwargs)
- count += 1
-
- self.min = self.min - partlen
-
- return sets
-
Source code for pyFTS.partitioners.Huarng
-"""
-K. H. Huarng, “Effective lengths of intervals to improve forecasting in fuzzy time series,”
-Fuzzy Sets Syst., vol. 123, no. 3, pp. 387–394, Nov. 2001.
-"""
-
-import numpy as np
-import math
-import random as rnd
-import functools, operator
-from pyFTS.common import FuzzySet, Membership, Transformations
-
-from pyFTS.partitioners import partitioner
-
-[docs]class HuarngPartitioner(partitioner.Partitioner):
- """Huarng Empirical Partitioner"""
- def __init__(self, **kwargs):
- super(HuarngPartitioner, self).__init__(name="Huarng", **kwargs)
-
-[docs] def build(self, data):
- diff = Transformations.Differential(1)
- data2 = diff.apply(data)
- davg = np.abs( np.mean(data2) / 2 )
-
- if davg <= 1.0:
- base = 0.1
- elif 1 < davg <= 10:
- base = 1.0
- elif 10 < davg <= 100:
- base = 10
- else:
- base = 100
-
- sets = {}
-
- kwargs = {'type': self.type, 'variable': self.variable}
-
- dlen = self.max - self.min
- npart = math.ceil(dlen / base)
- partition = math.ceil(self.min)
- for c in range(npart):
- _name = self.get_name(c)
- if self.membership_function == Membership.trimf:
- sets[_name] = FuzzySet.FuzzySet(_name, Membership.trimf,
- [partition - base, partition, partition + base], partition, **kwargs)
- elif self.membership_function == Membership.gaussmf:
- sets[_name] = FuzzySet.FuzzySet(_name, Membership.gaussmf,
- [partition, base/2], partition)
- elif self.membership_function == Membership.trapmf:
- sets[_name] = FuzzySet.FuzzySet(_name, Membership.trapmf,
- [partition - base, partition - (base/2),
- partition + (base / 2), partition + base], partition, **kwargs)
-
- partition += base
-
- return sets
-
Source code for pyFTS.partitioners.Simple
-"""Simple Partitioner for manually informed fuzzy sets"""
-
-import numpy as np
-import math
-import random as rnd
-import functools, operator
-from pyFTS.common import FuzzySet, Membership
-from pyFTS.partitioners import partitioner
-
-
-[docs]class SimplePartitioner(partitioner.Partitioner):
- """Simple Partitioner for manually informed fuzzy sets"""
-
- def __init__(self, **kwargs):
- """
- Simple Partitioner - the fuzzy sets are informed manually
- """
- kwargs['preprocess'] = False
-
- super(SimplePartitioner, self).__init__(name="Simple", **kwargs)
-
- self.partitions = 0
-
-[docs] def append_complex(self, fs):
- self.sets[fs.name] = fs
- self.partitions += 1
-
- self.ordered_sets = [key for key in sorted(self.sets.keys(), key=lambda k: self.sets[k].centroid)]
-
- self.min = self.sets[self.ordered_sets[0]].lower
- self.max = self.sets[self.ordered_sets[-1]].upper
-
-[docs] def append(self, name, mf, parameters, **kwargs):
- """
- Append a new partition (fuzzy set) to the partitioner
-
- :param name: Fuzzy set name
- :param mf: One of the pyFTS.common.Membership functions
- :param parameters: A list with the parameters for the membership function
- :param kwargs: Optional arguments for the fuzzy set
- """
- if name is None or len(name) == 0:
- raise ValueError("The name of the fuzzy set cannot be empty")
-
- if name in self.sets:
- raise ValueError("This name has already been used")
-
- if mf is None or mf not in (Membership.trimf, Membership.gaussmf,
- Membership.trapmf, Membership.singleton,
- Membership.sigmf):
- raise ValueError("The mf parameter should be one of pyFTS.common.Membership functions, not {}".format(mf))
-
- if mf == Membership.trimf:
- if len(parameters) != 3:
- raise ValueError("Incorrect number of parameters for the Membership.trimf")
-
- centroid = parameters[1]
- elif mf == Membership.gaussmf:
- if len(parameters) != 2:
- raise ValueError("Incorrect number of parameters for the Membership.gaussmf")
-
- centroid = parameters[0]
- elif mf == Membership.trapmf:
- if len(parameters) != 4:
- raise ValueError("Incorrect number of parameters for the Membership.trapmf")
-
- centroid = (parameters[1]+parameters[2])/2
- elif mf == Membership.singleton:
- if len(parameters) != 1:
- raise ValueError("Incorrect number of parameters for the Membership.singleton")
-
- centroid = parameters[0]
- elif mf == Membership.sigmf:
- if len(parameters) != 2:
- raise ValueError("Incorrect number of parameters for the Membership.sigmf")
-
- centroid = parameters[1] + (parameters[1] / (2 * parameters[0]))
-
- self.sets[name] = FuzzySet.FuzzySet(name, mf, parameters, centroid, **kwargs)
- self.partitions += 1
-
- self.ordered_sets = [key for key in sorted(self.sets.keys(), key=lambda k: self.sets[k].centroid)]
-
- self.min = self.sets[self.ordered_sets[0]].lower
- self.max = self.sets[self.ordered_sets[-1]].upper
-
-
-
Source code for pyFTS.partitioners.Singleton
-"""Even Length Grid Partitioner"""
-
-import numpy as np
-import math
-import random as rnd
-import functools, operator
-from pyFTS.common import FuzzySet, Membership
-from pyFTS.partitioners import partitioner
-
-
-[docs]class SingletonPartitioner(partitioner.Partitioner):
- """Singleton Partitioner"""
-
- def __init__(self, **kwargs):
- """
- Singleton Partitioner
- """
- super(SingletonPartitioner, self).__init__(name="Singleton", **kwargs)
-
-[docs] def build(self, data):
- sets = {}
-
- kwargs = {'type': self.type, 'variable': self.variable}
-
- for count, instance in enumerate(data):
- _name = self.get_name(count)
- sets[_name] = FuzzySet.FuzzySet(_name, Membership.singleton, [instance], instance, **kwargs)
-
- return sets
-
Source code for pyFTS.partitioners.Util
-"""
-Facility methods for pyFTS partitioners module
-"""
-
-import numpy as np
-import pandas as pd
-import matplotlib as plt
-import matplotlib.colors as pltcolors
-import matplotlib.pyplot as plt
-#from mpl_toolkits.mplot3d import Axes3D
-
-from pyFTS.benchmarks import Measures
-from pyFTS.common import Membership, Util
-from pyFTS.partitioners import Grid,Huarng,FCM,Entropy
-
-all_methods = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner, Huarng.HuarngPartitioner]
-
-mfs = [Membership.trimf, Membership.gaussmf, Membership.trapmf]
-
-
-[docs]def plot_sets(data, sets, titles, size=[12, 10], save=False, file=None, axis=None):
- num = len(sets)
-
- if axis is None:
- fig, axes = plt.subplots(nrows=num, ncols=1,figsize=size)
- for k in np.arange(0,num):
- ticks = []
- x = []
- ax = axes[k] if axis is None else axis
- ax.set_title(titles[k])
- ax.set_ylim([0, 1.1])
- for key in sets[k].keys():
- s = sets[k][key]
- if s.mf == Membership.trimf:
- ax.plot(s.parameters,[0,1,0])
- elif s.mf == Membership.gaussmf:
- tmpx = [ kk for kk in np.arange(s.lower, s.upper)]
- tmpy = [s.membership(kk) for kk in np.arange(s.lower, s.upper)]
- ax.plot(tmpx, tmpy)
- elif s.mf == Membership.trapmf:
- ax.plot(s.parameters, [0, 1, 1, 0])
- ticks.append(str(round(s.centroid, 0)) + '\n' + s.name)
- x.append(s.centroid)
- ax.xaxis.set_ticklabels(ticks)
- ax.xaxis.set_ticks(x)
-
- if axis is None:
- plt.tight_layout()
-
- Util.show_and_save_image(fig, file, save)
-
-
-[docs]def plot_partitioners(data, objs, tam=[12, 10], save=False, file=None, axis=None):
- sets = [k.sets for k in objs]
- titles = [k.name for k in objs]
- plot_sets(data, sets, titles, tam, save, file, axis)
-
-
-[docs]def explore_partitioners(data, npart, methods=None, mf=None, transformation=None,
- size=[12, 10], save=False, file=None):
- """
- Create partitioners for the mf membership functions and npart partitions and show the partitioning images.
- :data: Time series data
- :npart: Maximum number of partitions of the universe of discourse
- :methods: A list with the partitioning methods to be used
- :mf: A list with the membership functions to be used
- :transformation: a transformation to be used in partitioner
- :size: list, the size of the output image [width, height]
- :save: boolean, if the image will be saved on disk
- :file: string, the file path to save the image
- :return: the list of the built partitioners
- """
- if methods is None:
- methods = all_methods
-
- if mf is None:
- mf = mfs
-
- objs = []
-
- for p in methods:
- for m in mf:
- obj = p(data=data, npart=npart, func=m, transformation=transformation)
- obj.name = obj.name + " - " + obj.membership_function.__name__
- objs.append(obj)
-
- plot_partitioners(data, objs, size, save, file)
-
- return objs
-
Source code for pyFTS.partitioners.parallel_util
-from copy import deepcopy
-from joblib import Parallel, delayed
-import multiprocessing
-import numpy as np
-
-from pyFTS.common import Membership, Util
-from pyFTS.partitioners import Grid,Huarng,FCM,Entropy
-from pyFTS.partitioners import Util
-
-
-[docs]def explore_partitioners(data, npart, methods=None, mf=None, tam=[12, 10], save=False, file=None):
- all_methods = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
- mfs = [Membership.trimf, Membership.gaussmf, Membership.trapmf]
-
- if methods is None:
- methods = all_methods
-
- if mf is None:
- mf = mfs
-
- num_cores = multiprocessing.cpu_count()
-
- objs = []
- for method in methods:
- print(str(method))
- tmp = Parallel(n_jobs=num_cores)(delayed(method)(deepcopy(data), npart, m) for m in mf)
- objs.append(tmp)
-
- objs = np.ravel(objs).tolist()
-
- Util.plot_partitioners(data, objs, tam, save, file)
-
-
Source code for pyFTS.partitioners.partitioner
-from pyFTS.common import FuzzySet, Membership
-import numpy as np
-from scipy.spatial import KDTree
-import matplotlib.pylab as plt
-
-
-[docs]class Partitioner(object):
- """
- Universe of Discourse partitioner. Split data on several fuzzy sets
- """
-
- def __init__(self, **kwargs):
- """
- Universe of Discourse partitioner scheme. Split data on several fuzzy sets
- """
- self.name = kwargs.get('name',"")
- """partitioner name"""
- self.partitions = kwargs.get('npart', 10)
- """The number of universe of discourse partitions, i.e., the number of fuzzy sets that will be created"""
- self.sets = {}
- self.membership_function = kwargs.get('func', Membership.trimf)
- """Fuzzy membership function (pyFTS.common.Membership)"""
- self.setnames = kwargs.get('names', None)
- """list of partitions names. If None is given the partitions will be auto named with prefix"""
- self.prefix = kwargs.get('prefix', 'A')
- """prefix of auto generated partition names"""
- self.transformation = kwargs.get('transformation', None)
- """data transformation to be applied on data"""
- self.indexer = kwargs.get('indexer', None)
- self.variable = kwargs.get('variable', None)
- """In a multivariate context, the variable that contains this partitioner"""
- self.type = kwargs.get('type', 'common')
- """The type of fuzzy sets that are generated by this partitioner"""
- self.extractor = kwargs.get('extractor', lambda x: x)
- """Anonymous function used to extract a single primitive type from an object instance"""
- self.ordered_sets = None
- """A ordered list of the fuzzy sets names, sorted by their middle point"""
- self.kdtree = None
- """A spatial index to help in fuzzyfication"""
-
- if kwargs.get('preprocess',True):
-
- data = kwargs.get('data',[None])
-
- if self.indexer is not None:
- ndata = self.indexer.get_data(data)
- else:
- ndata = data
-
- if self.transformation is not None:
- ndata = self.transformation.apply(ndata)
- else:
- ndata = data
-
- if self.indexer is not None:
- ndata = self.indexer.get_data(ndata)
-
- _min = np.nanmin(ndata)
- if _min == -np.inf:
- ndata[ndata == -np.inf] = 0
- _min = np.nanmin(ndata)
-
- self.min = float(_min * 1.1 if _min < 0 else _min * 0.9)
-
- _max = np.nanmax(ndata)
- self.max = float(_max * 1.1 if _max > 0 else _max * 0.9)
-
- self.sets = self.build(ndata)
-
- if self.ordered_sets is None and self.setnames is not None:
- self.ordered_sets = self.setnames[:len(self.sets)]
- else:
- self.ordered_sets = FuzzySet.set_ordered(self.sets)
-
- del(ndata)
-
-[docs] def build(self, data):
- """
- Perform the partitioning of the Universe of Discourse
-
- :param data: training data
- :return:
- """
- pass
-
-[docs] def get_name(self, counter):
- """
- Find the name of the fuzzy set given its counter id.
-
- :param counter: The number of the fuzzy set
- :return: String
- """
- return self.prefix + str(counter) if self.setnames is None else self.setnames[counter]
-
-[docs] def lower_set(self):
- """
- Return the fuzzy set on lower bound of the universe of discourse.
-
- :return: Fuzzy Set
- """
- return self.sets[self.ordered_sets[0]]
-
-[docs] def upper_set(self):
- """
- Return the fuzzy set on upper bound of the universe of discourse.
-
- :return: Fuzzy Set
- """
- return self.sets[self.ordered_sets[-1]]
-
-[docs] def build_index(self):
- points = []
-
- #self.index = {}
-
- for ct, key in enumerate(self.ordered_sets):
- fset = self.sets[key]
- points.append([fset.lower, fset.centroid, fset.upper])
- #self.index[ct] = fset.name
-
- import sys
- sys.setrecursionlimit(100000)
-
- self.kdtree = KDTree(points)
-
- sys.setrecursionlimit(1000)
-
-[docs] def fuzzyfy(self, data, **kwargs):
- """
- Fuzzyfy the input data according to this partitioner fuzzy sets.
-
- :param data: input value to be fuzzyfied
- :keyword alpha_cut: the minimal membership value to be considered on fuzzyfication (only for mode='sets')
- :keyword method: the fuzzyfication method (fuzzy: all fuzzy memberships, maximum: only the maximum membership)
- :keyword mode: the fuzzyfication mode (sets: return the fuzzy sets names, vector: return a vector with the membership
- values for all fuzzy sets, both: return a list with tuples (fuzzy set, membership value) )
-
- :returns a list with the fuzzyfied values, depending on the mode
- """
-
- if isinstance(data, (list, np.ndarray)):
- ret = []
- for inst in data:
- mv = self.fuzzyfy(inst, **kwargs)
- ret.append(mv)
- return ret
-
- alpha_cut = kwargs.get('alpha_cut', 0.)
- mode = kwargs.get('mode', 'sets')
- method = kwargs.get('method', 'fuzzy')
-
- nearest = self.search(data, type='index')
-
- mv = np.zeros(self.partitions)
-
- for ix in nearest:
- tmp = self[ix].membership(data)
- mv[ix] = tmp if tmp >= alpha_cut else 0.
-
- ix = np.ravel(np.argwhere(mv > 0.))
-
- if ix.size == 0:
- mv[self.check_bounds(data)] = 1.
-
- if method == 'fuzzy' and mode == 'vector':
- return mv
- elif method == 'fuzzy' and mode == 'sets':
- ix = np.ravel(np.argwhere(mv > 0.))
- sets = [self.ordered_sets[i] for i in ix]
- return sets
- elif method == 'maximum' and mode == 'sets':
- mx = max(mv)
- ix = np.ravel(np.argwhere(mv == mx))
- return self.ordered_sets[ix[0]]
- elif mode == 'both':
- ix = np.ravel(np.argwhere(mv > 0.))
- sets = [(self.ordered_sets[i], mv[i]) for i in ix]
- return sets
-
-[docs] def check_bounds(self, data):
- '''
- Check if the input data is outside the known Universe of Discourse and, if it is, round it to the closest
- fuzzy set.
-
- :param data: input data to be verified
- :return: the index of the closest fuzzy set when data is outside de universe of discourse or None if
- the data is inside the UoD.
- '''
- if data < self.min:
- return 0
- elif data > self.max:
- return self.partitions-1
-
-[docs] def search(self, data, **kwargs):
- '''
- Perform a search for the nearest fuzzy sets of the point 'data'. This function were designed to work with several
- overlapped fuzzy sets.
-
- :param data: the value to search for the nearest fuzzy sets
- :param type: the return type: 'index' for the fuzzy set indexes or 'name' for fuzzy set names.
- :param results: the number of nearest fuzzy sets to return
- :return: a list with the nearest fuzzy sets
- '''
- if self.kdtree is None:
- self.build_index()
-
- type = kwargs.get('type','index')
- results = kwargs.get('results', 3)
-
- _, ix = self.kdtree.query([data, data, data], results)
-
- if type == 'name':
- return [self.ordered_sets[k] for k in sorted(ix)]
- else:
- return sorted(ix)
-
-
-[docs] def plot(self, ax, rounding=0):
- """
- Plot the partitioning using the Matplotlib axis ax
-
- :param ax: Matplotlib axis
- """
- ax.set_title(self.name)
- ax.set_ylim([0, 1.1])
- ax.set_xlim([self.min, self.max])
- ticks = []
- x = []
- for key in self.sets.keys():
- s = self.sets[key]
- if s.type == 'common':
- self.plot_set(ax, s)
- elif s.type == 'composite':
- for ss in s.sets:
- self.plot_set(ax, ss)
- ticks.append(str(round(s.centroid,rounding))+'\n'+s.name)
- x.append(s.centroid)
- ax.xaxis.set_ticklabels(ticks)
- ax.xaxis.set_ticks(x)
-
-[docs] def plot_set(self, ax, s):
- """
- Plot an isolate fuzzy set on Matplotlib axis
-
- :param ax: Matplotlib axis
- :param s: Fuzzy Set
- """
- if s.mf == Membership.trimf:
- ax.plot([s.parameters[0], s.parameters[1], s.parameters[2]], [0, s.alpha, 0])
- elif s.mf in (Membership.gaussmf, Membership.bellmf, Membership.sigmf):
- tmpx = np.linspace(s.lower, s.upper, 100)
- tmpy = [s.membership(kk) for kk in tmpx]
- ax.plot(tmpx, tmpy)
- elif s.mf == Membership.trapmf:
- ax.plot(s.parameters, [0, s.alpha, s.alpha, 0])
- elif s.mf == Membership.singleton:
- ax.plot([s.parameters[0],s.parameters[0]], [0, s.alpha])
-
- def __str__(self):
- """
- Return a string representation of the partitioner, the list of fuzzy sets and their parameters
-
- :return:
- """
- tmp = self.name + ":\n"
- for key in self.sets.keys():
- tmp += str(self.sets[key])+ "\n"
- return tmp
-
- def __len__(self):
- """
- Return the number of partitions
-
- :return: number of partitions
- """
- return self.partitions
-
- def __getitem__(self, item):
- """
- Return a fuzzy set by its order or its name.
-
- :param item: If item is an integer then it represents the fuzzy set index (order), if it was a string then
- it represents the fuzzy set name.
- :return: the fuzzy set
- """
- if isinstance(item, (int, np.int, np.int8, np.int16, np.int32, np.int64)):
- if item < 0 or item >= self.partitions:
- raise ValueError("The fuzzy set index must be between 0 and {}.".format(self.partitions))
- return self.sets[self.ordered_sets[item]]
- elif isinstance(item, str):
- if item not in self.sets:
- raise ValueError("The fuzzy set with name {} does not exist.".format(item))
- return self.sets[item]
- else:
- raise ValueError("The parameter 'item' must be an integer or a string and the value informed was {} of type {}!".format(item, type(item)))
-
- def __iter__(self):
- """
- Iterate over the fuzzy sets, ordered by its midpoints.
-
- :return: An iterator over the fuzzy sets.
- """
- for key in self.ordered_sets:
- yield self.sets[key]
-
-
Source code for pyFTS.probabilistic.ProbabilityDistribution
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-from pyFTS.common import FuzzySet,SortedCollection,tree
-from pyFTS.probabilistic import kde
-
-
-[docs]class ProbabilityDistribution(object):
- """
- Represents a discrete or continous probability distribution
- If type is histogram, the PDF is discrete
- If type is KDE the PDF is continuous
- """
- def __init__(self, type = "KDE", **kwargs):
- self.uod = kwargs.get("uod", None)
- """Universe of discourse"""
-
- self.data = []
-
- self.type = type
- """
- If type is histogram, the PDF is discrete
- If type is KDE the PDF is continuous
- """
-
- self.bins = kwargs.get("bins", None)
- """Number of bins on a discrete PDF"""
- self.labels = kwargs.get("bins_labels", None)
- """Bins labels on a discrete PDF"""
-
- data = kwargs.get("data", None)
-
- if self.type == "KDE":
- self.kde = kde.KernelSmoothing(kwargs.get("h", 0.5), kwargs.get("kernel", "epanechnikov"))
- _min = np.nanmin(data)
- _min = _min * .7 if _min > 0 else _min * 1.3
- _max = np.nanmax(data)
- _max = _max * 1.3 if _max > 0 else _max * .7
- self.uod = [_min, _max]
-
- self.nbins = kwargs.get("num_bins", 100)
-
- if self.bins is None:
- self.bins = np.linspace(int(self.uod[0]), int(self.uod[1]), int(self.nbins)).tolist()
- self.labels = [str(k) for k in self.bins]
-
- if self.uod is not None:
- self.resolution = (self.uod[1] - self.uod[0]) / self.nbins
-
- self.bin_index = SortedCollection.SortedCollection(iterable=sorted(self.bins))
- self.quantile_index = None
- self.distribution = {}
- self.cdf = None
- self.qtl = None
- self.count = 0
- for k in self.bins: self.distribution[k] = 0
-
- if data is not None:
- self.append(data)
-
- self.name = kwargs.get("name", "")
-
-[docs] def set(self, value, density):
- """
- Assert a probability 'density' for a certain value 'value', such that P(value) = density
-
- :param value: A value in the universe of discourse from the distribution
- :param density: The probability density to assign to the value
- """
- k = self.bin_index.find_ge(value)
- self.distribution[k] = density
-
-[docs] def append(self, values):
- """
- Increment the frequency count for the values
-
- :param values: A list of values to account the frequency
- """
- if self.type == "histogram":
- for k in values:
- v = self.bin_index.find_ge(k)
- self.distribution[v] += 1
- self.count += 1
- else:
- self.data.extend(values)
- self.distribution = {}
- dens = self.density(self.bins)
- for v,d in enumerate(dens):
- self.distribution[self.bins[v]] = d
-
-[docs] def append_interval(self, intervals):
- """
- Increment the frequency count for all values inside an interval
-
- :param intervals: A list of intervals do increment the frequency
- """
- if self.type == "histogram":
- for interval in intervals:
- for k in self.bin_index.inside(interval[0], interval[1]):
- self.distribution[k] += 1
- self.count += 1
-
-[docs] def density(self, values):
- """
- Return the probability densities for the input values
-
- :param values: List of values to return the densities
- :return: List of probability densities for the input values
- """
- ret = []
- scalar = False
-
- if not isinstance(values, list):
- values = [values]
- scalar = True
-
- for k in values:
- if self.type == "histogram":
- v = self.bin_index.find_ge(k)
- ret.append(self.distribution[v] / (self.count + 1e-5))
- elif self.type == "KDE":
- v = self.kde.probability(k, self.data)
- ret.append(v)
- else:
- v = self.bin_index.find_ge(k)
- ret.append(self.distribution[v])
-
- if scalar:
- return ret[0]
-
- return ret
-
-[docs] def differential_offset(self, value):
- """
- Auxiliary function for probability distributions of differentiated data
-
- :param value:
- :return:
- """
- nbins = []
- dist = {}
-
- for k in self.bins:
- nk = k+value
- nbins.append(nk)
- dist[nk] = self.distribution[k]
-
- self.bins = nbins
- self.distribution = dist
- self.labels = [str(k) for k in self.bins]
-
- self.bin_index = SortedCollection.SortedCollection(iterable=sorted(self.bins))
- self.quantile_index = None
- self.cdf = None
- self.qtl = None
-
-[docs] def expected_value(self):
- """
- Return the expected value of the distribution, as E[X] = ∑ x * P(x)
-
- :return: The expected value of the distribution
- """
- return np.nansum([v * self.distribution[v] for v in self.bins])
-
-[docs] def build_cdf_qtl(self):
- ret = 0.0
- self.cdf = {}
- self.qtl = {}
- for k in sorted(self.bins):
- ret += self.density(k)
- if k not in self.cdf:
- self.cdf[k] = ret
-
- if str(ret) not in self.qtl:
- self.qtl[str(ret)] = []
-
- self.qtl[str(ret)].append(k)
-
- _keys = [float(k) for k in sorted(self.qtl.keys())]
-
- self.quantile_index = SortedCollection.SortedCollection(iterable=_keys)
-
-[docs] def cumulative(self, values):
- """
- Return the cumulative probability densities for the input values,
- such that F(x) = P(X <= x)
-
- :param values: A list of input values
- :return: The cumulative probability densities for the input values
- """
- if self.cdf is None:
- self.build_cdf_qtl()
-
- if isinstance(values, list):
- ret = []
- for val in values:
- k = self.bin_index.find_ge(val)
- ret.append(self.cdf[k])
- else:
- k = self.bin_index.find_ge(values)
- return self.cdf[values]
-
-[docs] def quantile(self, values):
- """
- Return the Universe of Discourse values in relation to the quantile input values,
- such that Q(tau) = min( {x | F(x) >= tau })
-
- :param values: input values
- :return: The list of the quantile values for the input values
- """
- if self.qtl is None:
- self.build_cdf_qtl()
-
- if isinstance(values, list):
- ret = []
- for val in values:
- k = self.quantile_index.find_ge(val)
- ret.append(self.qtl[str(k)][0])
- else:
- k = self.quantile_index.find_ge(values)
- ret = self.qtl[str(k)]
-
- return ret
-
-[docs] def entropy(self):
- """
- Return the entropy of the probability distribution, H(P) = E[ -ln P(X) ] = - ∑ P(x) log ( P(x) )
-
- :return:the entropy of the probability distribution
- """
- h = -sum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0
- for k in self.bins])
- return h
-
-[docs] def crossentropy(self,q):
- """
- Cross entropy between the actual probability distribution and the informed one,
- H(P,Q) = - ∑ P(x) log ( Q(x) )
-
- :param q: a probabilistic.ProbabilityDistribution object
- :return: Cross entropy between this probability distribution and the given distribution
- """
- h = -sum([self.distribution[k] * np.log(q.distribution[k]) if self.distribution[k] > 0 else 0
- for k in self.bins])
- return h
-
-[docs] def kullbackleiblerdivergence(self,q):
- """
- Kullback-Leibler divergence between the actual probability distribution and the informed one.
- DKL(P || Q) = - ∑ P(x) log( P(X) / Q(x) )
-
- :param q: a probabilistic.ProbabilityDistribution object
- :return: Kullback-Leibler divergence
- """
- h = sum([self.distribution[k] * np.log(self.distribution[k]/q.distribution[k]) if self.distribution[k] > 0 else 0
- for k in self.bins])
- return h
-
-[docs] def empiricalloglikelihood(self):
- """
- Empirical Log Likelihood of the probability distribution, L(P) = ∑ log( P(x) )
-
- :return:
- """
- _s = 0
- for k in self.bins:
- if self.distribution[k] > 0:
- _s += np.log(self.distribution[k])
- return _s
-
-[docs] def pseudologlikelihood(self, data):
- """
- Pseudo log likelihood of the probability distribution with respect to data
-
- :param data:
- :return:
- """
-
- densities = self.density(data)
-
- _s = 0
- for k in densities:
- if k > 0:
- _s += np.log(k)
- return _s
-
-[docs] def averageloglikelihood(self, data):
- """
- Average log likelihood of the probability distribution with respect to data
-
- :param data:
- :return:
- """
-
- densities = self.density(data)
-
- _s = 0
- for k in densities:
- if k > 0:
- _s += np.log(k)
- return _s / len(data)
-
-[docs] def plot(self,axis=None,color="black",tam=[10, 6], title = None):
-
- if axis is None:
- fig = plt.figure(figsize=tam)
- axis = fig.add_subplot(111)
-
- if self.type == "histogram":
- ys = [self.distribution[k]/self.count for k in self.bins]
- else:
- ys = [self.distribution[k] for k in self.bins]
- yp = [0 for k in self.data]
- axis.plot(self.data, yp, c="red")
-
- if title is None:
- title = self.name
- axis.plot(self.bins, ys, c=color)
- axis.set_title(title)
-
- axis.set_xlabel('Universe of Discourse')
- axis.set_ylabel('Probability')
-
- def __str__(self):
- ret = ""
- for k in sorted(self.bins):
- ret += str(round(k,2)) + ':\t'
- if self.type == "histogram":
- ret += str(round(self.distribution[k] / self.count,3))
- elif self.type == "KDE":
- ret += str(round(self.density(k),3))
- else:
- ret += str(round(self.distribution[k], 6))
- ret += '\n'
- return ret
-
Source code for pyFTS.probabilistic.kde
-# -*- coding: utf8 -*-
-
-"""
-Kernel Density Estimation
-"""
-
-from pyFTS.common import Transformations
-import numpy as np
-
-
-[docs]class KernelSmoothing(object):
- """Kernel Density Estimation"""
- def __init__(self,h, kernel="epanechnikov"):
- self.h = h
- """Width parameter"""
- self.kernel = kernel
- """Kernel function"""
- self.transf = Transformations.Scale(min=0,max=1)
-
-[docs] def kernel_function(self, u):
- """
- Apply the kernel
-
- :param u:
- :return:
- """
- if self.kernel == "epanechnikov":
- tmp = (3/4)*(1.0 - u**2)
- return tmp if tmp > 0 else 0
- elif self.kernel == "gaussian":
- return (1.0/np.sqrt(2*np.pi))*np.exp(-0.5*u**2)
- elif self.kernel == "uniform":
- return 0.5
- elif self.kernel == "triangular":
- tmp = 1.0 - np.abs(u)
- return tmp if tmp > 0 else 0
- elif self.kernel == "logistic":
- return 1.0/(np.exp(u)+2+np.exp(-u))
- elif self.kernel == "cosine":
- return (np.pi/4.0)*np.cos((np.pi/2.0)*u)
- elif self.kernel == "sigmoid":
- return (2.0/np.pi)*(1.0/(np.exp(u)+np.exp(-u)))
- elif self.kernel == "tophat":
- return 1 if np.abs(u) < 0.5 else 0
- elif self.kernel == "exponential":
- return 0.5 * np.exp(-np.abs(u))
-
-[docs] def probability(self, x, data):
- """
- Probability of the point x on data
-
- :param x:
- :param data:
- :return:
- """
- l = len(data)
-
- ndata = self.transf.apply(data)
- nx = self.transf.apply(x)
- p = sum([self.kernel_function((nx - k)/self.h) for k in ndata]) / l*self.h
-
- return p
-