Export main dataframe as numpy array for clustering

master
Aleksey Filippov 1 year ago
parent 488857052d
commit 2dd770d578

@ -2,6 +2,7 @@ from datetime import date
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from numpy import ndarray
from pandas import DataFrame from pandas import DataFrame
from src.main.constants import Constants as const from src.main.constants import Constants as const
@ -10,6 +11,7 @@ from src.main.utils import Utils
class DfLoader: class DfLoader:
def __init__(self, json_file: str) -> None: def __init__(self, json_file: str) -> None:
self.__geocache: Geocache = Geocache() self.__geocache: Geocache = Geocache()
print(f'Try to load data from the {json_file} file') print(f'Try to load data from the {json_file} file')
@ -68,5 +70,6 @@ class DfLoader:
self.__df['location'] = self.__df['city'] \ self.__df['location'] = self.__df['city'] \
.apply(lambda val: '' if Utils.is_empty_str(val) else self.__geocache.get_location(val)) .apply(lambda val: '' if Utils.is_empty_str(val) else self.__geocache.get_location(val))
def get_clustering_data(self) -> DataFrame: def get_clustering_data(self) -> ndarray:
return self.__df columns: [] = ['location', 'sex', 'age', 'is_university', 'is_work', 'is_student', 'is_schoolboy']
return self.__df[columns].to_numpy()

Loading…
Cancel
Save