2016-12-26 21:47:23 +04:00
import numpy as np
import math
import random as rnd
import functools , operator
from pyFTS . common import FuzzySet , Membership
2017-02-24 20:29:55 +04:00
from pyFTS . partitioners import partitioner
2016-12-26 21:47:23 +04:00
# C. H. Cheng, R. J. Chang, and C. A. Yeh, “Entropy-based and trapezoidal fuzzification-based fuzzy time series approach for forecasting IT project cost,”
# Technol. Forecast. Social Change, vol. 73, no. 5, pp. 524– 542, Jun. 2006.
2016-12-27 00:25:59 +04:00
2016-12-26 21:47:23 +04:00
def splitBelow ( data , threshold ) :
return [ k for k in data if k < = threshold ]
2016-12-27 00:25:59 +04:00
2016-12-26 21:47:23 +04:00
def splitAbove ( data , threshold ) :
return [ k for k in data if k > threshold ]
2016-12-27 00:25:59 +04:00
2016-12-26 21:47:23 +04:00
def PMF ( data , threshold ) :
a = sum ( [ 1.0 for k in splitBelow ( data , threshold ) ] )
b = sum ( [ 1.0 for k in splitAbove ( data , threshold ) ] )
l = len ( data )
return [ a / l , b / l ]
def entropy ( data , threshold ) :
pmf = PMF ( data , threshold )
2016-12-27 00:25:59 +04:00
if pmf [ 0 ] == 0 or pmf [ 1 ] == 0 :
return 1
else :
return - sum ( [ pmf [ 0 ] * math . log ( pmf [ 0 ] ) , pmf [ 1 ] * math . log ( pmf [ 1 ] ) ] )
2016-12-26 21:47:23 +04:00
def informationGain ( data , thres1 , thres2 ) :
return entropy ( data , thres1 ) - entropy ( data , thres2 )
def bestSplit ( data , npart ) :
if len ( data ) < 2 :
return None
2016-12-27 00:25:59 +04:00
count = 1
2016-12-26 21:47:23 +04:00
ndata = list ( set ( data ) )
ndata . sort ( )
2016-12-27 00:25:59 +04:00
l = len ( ndata )
2016-12-26 21:47:23 +04:00
threshold = 0
2016-12-27 00:25:59 +04:00
try :
while count < l and informationGain ( data , ndata [ count - 1 ] , ndata [ count ] ) < = 0 :
threshold = ndata [ count ]
count + = 1
except IndexError :
print ( threshold )
print ( ndata )
print ( count )
2016-12-26 21:47:23 +04:00
rem = npart % 2
if ( npart - rem ) / 2 > 1 :
p1 = splitBelow ( data , threshold )
p2 = splitAbove ( data , threshold )
if len ( p1 ) > len ( p2 ) :
np1 = ( npart - rem ) / 2 + rem
np2 = ( npart - rem ) / 2
else :
np1 = ( npart - rem ) / 2
np2 = ( npart - rem ) / 2 + rem
2016-12-27 00:25:59 +04:00
tmp = [ threshold ]
for k in bestSplit ( p1 , np1 ) : tmp . append ( k )
for k in bestSplit ( p2 , np2 ) : tmp . append ( k )
return tmp
2016-12-26 21:47:23 +04:00
else :
2016-12-27 00:25:59 +04:00
return [ threshold ]
2017-02-24 20:29:55 +04:00
class EntropyPartitioner ( partitioner . Partitioner ) :
def __init__ ( self , data , npart , func = Membership . trimf ) :
super ( EntropyPartitioner , self ) . __init__ ( " Entropy " , data , npart , func )
def build ( self , data ) :
sets = [ ]
dmax = max ( data )
dmax + = dmax * 0.10
dmin = min ( data )
dmin - = dmin * 0.10
partitions = bestSplit ( data , self . partitions )
partitions . append ( dmin )
partitions . append ( dmax )
partitions = list ( set ( partitions ) )
partitions . sort ( )
for c in np . arange ( 1 , len ( partitions ) - 1 ) :
sets . append ( FuzzySet . FuzzySet ( self . prefix + str ( c ) , Membership . trimf ,
[ partitions [ c - 1 ] , partitions [ c ] , partitions [ c + 1 ] ] , partitions [ c ] ) )
return sets