2017-02-05 02:40:27 +04:00
|
|
|
import numpy as np
|
2017-07-04 19:18:07 +04:00
|
|
|
import pandas as pd
|
2017-02-08 19:23:41 +04:00
|
|
|
from enum import Enum
|
2017-02-05 02:40:27 +04:00
|
|
|
|
2017-07-02 02:42:45 +04:00
|
|
|
|
2017-02-05 02:40:27 +04:00
|
|
|
class SeasonalIndexer(object):
|
2017-05-02 18:32:03 +04:00
|
|
|
"""
|
|
|
|
Seasonal Indexer. Responsible to find the seasonal index of a data point inside its data set
|
|
|
|
"""
|
2017-07-04 01:39:10 +04:00
|
|
|
def __init__(self,num_seasons, **kwargs):
|
2017-02-05 02:40:27 +04:00
|
|
|
self.num_seasons = num_seasons
|
2017-07-04 01:39:10 +04:00
|
|
|
self.name = kwargs.get("name","")
|
2017-02-05 02:40:27 +04:00
|
|
|
|
|
|
|
def get_season_of_data(self,data):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def get_season_by_index(self,inde):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def get_data_by_season(self, data, indexes):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def get_index_by_season(self, indexes):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def get_data(self, data):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
class LinearSeasonalIndexer(SeasonalIndexer):
|
2017-07-04 19:18:07 +04:00
|
|
|
def __init__(self,seasons,units,ignore=None, **kwargs):
|
|
|
|
super(LinearSeasonalIndexer, self).__init__(len(seasons), **kwargs)
|
2017-02-05 02:40:27 +04:00
|
|
|
self.seasons = seasons
|
2017-05-22 07:44:07 +04:00
|
|
|
self.units = units
|
|
|
|
self.ignore = ignore
|
2017-02-05 02:40:27 +04:00
|
|
|
|
|
|
|
def get_season_of_data(self,data):
|
2017-05-22 01:04:10 +04:00
|
|
|
return self.get_season_by_index(np.arange(0, len(data)).tolist())
|
2017-02-05 02:40:27 +04:00
|
|
|
|
|
|
|
def get_season_by_index(self,index):
|
|
|
|
ret = []
|
2017-05-22 01:04:10 +04:00
|
|
|
if not isinstance(index, (list, np.ndarray)):
|
2017-05-22 07:44:07 +04:00
|
|
|
if self.num_seasons == 1:
|
|
|
|
season = (index // self.units[0]) % self.seasons[0]
|
|
|
|
else:
|
|
|
|
season = []
|
|
|
|
for ct, seasonality in enumerate(self.seasons, start=0):
|
|
|
|
tmp = (index // self.units[ct]) % self.seasons[ct]
|
|
|
|
if not self.ignore[ct]:
|
|
|
|
season.append(tmp)
|
|
|
|
ret.append(season)
|
2017-05-22 01:04:10 +04:00
|
|
|
else:
|
|
|
|
for ix in index:
|
|
|
|
if self.num_seasons == 1:
|
2017-05-22 07:44:07 +04:00
|
|
|
season = (ix // self.units[0]) % self.seasons[0]
|
2017-05-22 01:04:10 +04:00
|
|
|
else:
|
|
|
|
season = []
|
2017-05-22 07:44:07 +04:00
|
|
|
for ct, seasonality in enumerate(self.seasons, start=0):
|
|
|
|
tmp = (ix // self.units[ct]) % self.seasons[ct]
|
|
|
|
if not self.ignore[ct]:
|
|
|
|
season.append(tmp)
|
|
|
|
ret.append(season)
|
2017-02-05 02:40:27 +04:00
|
|
|
|
|
|
|
return ret
|
|
|
|
|
|
|
|
def get_index_by_season(self, indexes):
|
|
|
|
ix = 0;
|
|
|
|
|
|
|
|
for count,season in enumerate(self.seasons):
|
|
|
|
ix += season*(indexes[count])
|
|
|
|
|
|
|
|
#ix += indexes[-1]
|
|
|
|
|
|
|
|
return ix
|
|
|
|
|
|
|
|
def get_data(self, data):
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
|
class DataFrameSeasonalIndexer(SeasonalIndexer):
|
2017-07-04 01:39:10 +04:00
|
|
|
def __init__(self,index_fields,index_seasons, data_fields,**kwargs):
|
2017-07-04 19:18:07 +04:00
|
|
|
super(DataFrameSeasonalIndexer, self).__init__(len(index_seasons), **kwargs)
|
2017-02-05 02:40:27 +04:00
|
|
|
self.fields = index_fields
|
|
|
|
self.seasons = index_seasons
|
|
|
|
self.data_fields = data_fields
|
|
|
|
|
|
|
|
def get_season_of_data(self,data):
|
2017-02-08 19:23:41 +04:00
|
|
|
#data = data.copy()
|
2017-02-05 02:40:27 +04:00
|
|
|
ret = []
|
|
|
|
for ix in data.index:
|
|
|
|
season = []
|
|
|
|
for c, f in enumerate(self.fields, start=0):
|
|
|
|
if self.seasons[c] is None:
|
|
|
|
season.append(data[f][ix])
|
|
|
|
else:
|
2017-02-08 19:23:41 +04:00
|
|
|
a = data[f][ix]
|
|
|
|
season.append(a // self.seasons[c])
|
2017-02-05 02:40:27 +04:00
|
|
|
ret.append(season)
|
|
|
|
return ret
|
|
|
|
|
|
|
|
def get_season_by_index(self,index):
|
|
|
|
raise Exception("Operation not available!")
|
|
|
|
|
|
|
|
def get_data_by_season(self, data, indexes):
|
|
|
|
for season in indexes:
|
|
|
|
for c, f in enumerate(self.fields, start=0):
|
|
|
|
if self.seasons[c] is None:
|
|
|
|
data = data[data[f]== season[c]]
|
|
|
|
else:
|
|
|
|
data = data[(data[f] // self.seasons[c]) == season[c]]
|
|
|
|
return data[self.data_fields]
|
|
|
|
|
|
|
|
def get_index_by_season(self, indexes):
|
|
|
|
raise Exception("Operation not available!")
|
|
|
|
|
|
|
|
def get_data(self, data):
|
|
|
|
return data[self.data_fields].tolist()
|
|
|
|
|
|
|
|
def set_data(self, data, value):
|
2017-02-08 19:23:41 +04:00
|
|
|
data.loc[:,self.data_fields] = value
|
|
|
|
return data
|
|
|
|
|
2017-07-02 02:42:45 +04:00
|
|
|
|
2017-02-08 19:23:41 +04:00
|
|
|
class DateTime(Enum):
|
|
|
|
year = 1
|
|
|
|
month = 2
|
|
|
|
day_of_month = 3
|
|
|
|
day_of_year = 4
|
|
|
|
day_of_week = 5
|
|
|
|
hour = 6
|
|
|
|
minute = 7
|
|
|
|
second = 8
|
|
|
|
|
|
|
|
|
|
|
|
class DateTimeSeasonalIndexer(SeasonalIndexer):
|
2017-07-04 01:39:10 +04:00
|
|
|
def __init__(self,date_field, index_fields, index_seasons, data_fields,**kwargs):
|
2017-07-04 19:18:07 +04:00
|
|
|
super(DateTimeSeasonalIndexer, self).__init__(len(index_seasons), **kwargs)
|
2017-02-08 19:23:41 +04:00
|
|
|
self.fields = index_fields
|
|
|
|
self.seasons = index_seasons
|
|
|
|
self.data_fields = data_fields
|
|
|
|
self.date_field = date_field
|
|
|
|
|
|
|
|
def strip_datepart(self, date, date_part, resolution):
|
|
|
|
if date_part == DateTime.year:
|
|
|
|
tmp = date.year
|
|
|
|
elif date_part == DateTime.month:
|
|
|
|
tmp = date.month
|
|
|
|
elif date_part == DateTime.day_of_year:
|
|
|
|
tmp = date.timetuple().tm_yday
|
|
|
|
elif date_part == DateTime.day_of_month:
|
|
|
|
tmp = date.day
|
|
|
|
elif date_part == DateTime.day_of_week:
|
|
|
|
tmp = date.weekday()
|
|
|
|
elif date_part == DateTime.hour:
|
|
|
|
tmp = date.hour
|
|
|
|
elif date_part == DateTime.minute:
|
|
|
|
tmp = date.minute
|
|
|
|
elif date_part == DateTime.second:
|
|
|
|
tmp = date.second
|
|
|
|
|
|
|
|
if resolution is None:
|
|
|
|
return tmp
|
|
|
|
else:
|
|
|
|
return tmp // resolution
|
|
|
|
|
|
|
|
def get_season_of_data(self, data):
|
2017-07-04 19:18:07 +04:00
|
|
|
|
2017-02-08 19:23:41 +04:00
|
|
|
ret = []
|
2017-07-04 19:18:07 +04:00
|
|
|
|
|
|
|
if isinstance(data, pd.DataFrame):
|
|
|
|
for ix in data.index:
|
|
|
|
date = data[self.date_field][ix]
|
|
|
|
season = []
|
|
|
|
for c, f in enumerate(self.fields, start=0):
|
|
|
|
season.append(self.strip_datepart(date, f, self.seasons[c]))
|
|
|
|
ret.append(season)
|
|
|
|
|
|
|
|
elif isinstance(data, pd.Series):
|
|
|
|
date = data[self.date_field]
|
2017-02-08 19:23:41 +04:00
|
|
|
season = []
|
|
|
|
for c, f in enumerate(self.fields, start=0):
|
2017-07-04 19:18:07 +04:00
|
|
|
season.append(self.strip_datepart(date, f, self.seasons[c]))
|
2017-02-08 19:23:41 +04:00
|
|
|
ret.append(season)
|
2017-07-04 19:18:07 +04:00
|
|
|
|
2017-02-08 19:23:41 +04:00
|
|
|
return ret
|
|
|
|
|
|
|
|
def get_season_by_index(self, index):
|
|
|
|
raise Exception("Operation not available!")
|
|
|
|
|
|
|
|
def get_data_by_season(self, data, indexes):
|
|
|
|
raise Exception("Operation not available!")
|
|
|
|
|
|
|
|
def get_index_by_season(self, indexes):
|
|
|
|
raise Exception("Operation not available!")
|
|
|
|
|
|
|
|
def get_data(self, data):
|
|
|
|
return data[self.data_fields].tolist()
|
|
|
|
|
|
|
|
def set_data(self, data, value):
|
|
|
|
raise Exception("Operation not available!")
|