Return ndarray instead DataFrame in df_loader.py

This commit is contained in:
Aleksey Filippov 2023-06-06 17:59:37 +04:00
parent ae87945f46
commit f4a32bf57f

View File

@ -2,6 +2,7 @@ from datetime import date
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from numpy import ndarray
from pandas import DataFrame from pandas import DataFrame
from src.main.constants import Constants as const from src.main.constants import Constants as const
@ -73,8 +74,8 @@ class DfLoader:
self.__df['location-lo'] = self.__df.loc[:, 'location'] \ self.__df['location-lo'] = self.__df.loc[:, 'location'] \
.apply(lambda val: 0 if Utils.is_empty_collection(val) else val[1]) .apply(lambda val: 0 if Utils.is_empty_collection(val) else val[1])
def get_clustering_data(self) -> DataFrame: def get_data(self) -> ndarray:
columns: [] = ['location-la', 'location-lo', columns: [] = ['location-la', 'location-lo',
'sex', 'age', 'is_university', 'is_work', 'is_student', 'is_schoolboy'] 'sex', 'age', 'is_university', 'is_work', 'is_student', 'is_schoolboy']
df = self.__df df = self.__df
return df[columns] return df[columns].to_numpy()