Export main dataframe as numpy array for clustering
This commit is contained in:
parent
488857052d
commit
2dd770d578
@ -2,6 +2,7 @@ from datetime import date
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
from numpy import ndarray
|
||||||
from pandas import DataFrame
|
from pandas import DataFrame
|
||||||
|
|
||||||
from src.main.constants import Constants as const
|
from src.main.constants import Constants as const
|
||||||
@ -10,6 +11,7 @@ from src.main.utils import Utils
|
|||||||
|
|
||||||
|
|
||||||
class DfLoader:
|
class DfLoader:
|
||||||
|
|
||||||
def __init__(self, json_file: str) -> None:
|
def __init__(self, json_file: str) -> None:
|
||||||
self.__geocache: Geocache = Geocache()
|
self.__geocache: Geocache = Geocache()
|
||||||
print(f'Try to load data from the {json_file} file')
|
print(f'Try to load data from the {json_file} file')
|
||||||
@ -68,5 +70,6 @@ class DfLoader:
|
|||||||
self.__df['location'] = self.__df['city'] \
|
self.__df['location'] = self.__df['city'] \
|
||||||
.apply(lambda val: '' if Utils.is_empty_str(val) else self.__geocache.get_location(val))
|
.apply(lambda val: '' if Utils.is_empty_str(val) else self.__geocache.get_location(val))
|
||||||
|
|
||||||
def get_clustering_data(self) -> DataFrame:
|
def get_clustering_data(self) -> ndarray:
|
||||||
return self.__df
|
columns: [] = ['location', 'sex', 'age', 'is_university', 'is_work', 'is_student', 'is_schoolboy']
|
||||||
|
return self.__df[columns].to_numpy()
|
||||||
|
Loading…
Reference in New Issue
Block a user