Export main dataframe as numpy array for clustering
This commit is contained in:
parent
488857052d
commit
2dd770d578
@ -2,6 +2,7 @@ from datetime import date
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from numpy import ndarray
|
||||
from pandas import DataFrame
|
||||
|
||||
from src.main.constants import Constants as const
|
||||
@ -10,6 +11,7 @@ from src.main.utils import Utils
|
||||
|
||||
|
||||
class DfLoader:
|
||||
|
||||
def __init__(self, json_file: str) -> None:
|
||||
self.__geocache: Geocache = Geocache()
|
||||
print(f'Try to load data from the {json_file} file')
|
||||
@ -68,5 +70,6 @@ class DfLoader:
|
||||
self.__df['location'] = self.__df['city'] \
|
||||
.apply(lambda val: '' if Utils.is_empty_str(val) else self.__geocache.get_location(val))
|
||||
|
||||
def get_clustering_data(self) -> DataFrame:
|
||||
return self.__df
|
||||
def get_clustering_data(self) -> ndarray:
|
||||
columns: [] = ['location', 'sex', 'age', 'is_university', 'is_work', 'is_student', 'is_schoolboy']
|
||||
return self.__df[columns].to_numpy()
|
||||
|
Loading…
Reference in New Issue
Block a user