Export main dataframe as numpy array for clustering

This commit is contained in:
Aleksey Filippov 2023-06-05 18:32:51 +04:00
parent 488857052d
commit 2dd770d578

View File

@ -2,6 +2,7 @@ from datetime import date
import numpy as np
import pandas as pd
from numpy import ndarray
from pandas import DataFrame
from src.main.constants import Constants as const
@ -10,6 +11,7 @@ from src.main.utils import Utils
class DfLoader:
def __init__(self, json_file: str) -> None:
self.__geocache: Geocache = Geocache()
print(f'Try to load data from the {json_file} file')
@ -68,5 +70,6 @@ class DfLoader:
self.__df['location'] = self.__df['city'] \
.apply(lambda val: '' if Utils.is_empty_str(val) else self.__geocache.get_location(val))
def get_clustering_data(self) -> DataFrame:
return self.__df
def get_clustering_data(self) -> ndarray:
columns: [] = ['location', 'sex', 'age', 'is_university', 'is_work', 'is_student', 'is_schoolboy']
return self.__df[columns].to_numpy()