Improve models for prepare_dataset script for minimize null data
This commit is contained in:
parent
3a51a23048
commit
86fbf158f0
@ -4,11 +4,11 @@ from src.raw_data import RawData
|
|||||||
class Career:
|
class Career:
|
||||||
def __init__(self, raw_career):
|
def __init__(self, raw_career):
|
||||||
data = RawData(raw_career)
|
data = RawData(raw_career)
|
||||||
self.id = data.id
|
self.id = data.get_int('id')
|
||||||
self.group_id = data.group_id
|
self.group_id = data.get_int('group_id')
|
||||||
self.company = data.company
|
self.company = data.get_str('company')
|
||||||
self.country_id = data.country_id
|
self.country_id = data.get_int('country_id')
|
||||||
self.city_id = data.city_id
|
self.city_id = data.get_int('city_id')
|
||||||
self.year_from = raw_career['from']
|
self.year_from = data.get_int('from')
|
||||||
self.until = data.until
|
self.until = data.get_int('until')
|
||||||
self.position = data.position
|
self.position = data.get_str('position')
|
||||||
|
@ -4,9 +4,9 @@ from src.raw_data import RawData
|
|||||||
class Military:
|
class Military:
|
||||||
def __init__(self, raw_military):
|
def __init__(self, raw_military):
|
||||||
data = RawData(raw_military)
|
data = RawData(raw_military)
|
||||||
self.id = data.id
|
self.id = data.get_int('id')
|
||||||
self.unit_id = data.unit_id
|
self.unit_id = data.get_int('unit_id')
|
||||||
self.unit = data.unit
|
self.unit = data.get_str('unit')
|
||||||
self.country_id = data.country_id
|
self.country_id = data.get_int('country_id')
|
||||||
self.year_from = raw_military['from']
|
self.year_from = data.get_int('from')
|
||||||
self.until = data.until
|
self.until = data.get_int('until')
|
||||||
|
@ -13,51 +13,46 @@ class Person:
|
|||||||
self.deactivated = data.deactivated is not None
|
self.deactivated = data.deactivated is not None
|
||||||
self.has_photo = data.has_photo == 1
|
self.has_photo = data.has_photo == 1
|
||||||
self.followers_count = data.followers_count
|
self.followers_count = data.followers_count
|
||||||
self.sex = data.sex
|
self.sex = data.get_int('sex')
|
||||||
self.bdate = data.get_date(data.bdate)
|
self.bdate = RawData.get_date_st(data.bdate)
|
||||||
self.relation = data.relation
|
self.relation = data.get_int('relation')
|
||||||
self.country_id = data.get_int(data.country, 'id')
|
self.country_id = RawData.get_int_st(data.country, 'id')
|
||||||
self.country = data.get_int(data.country, 'title')
|
self.country = RawData.get_str_st(data.country, 'title')
|
||||||
self.city_id = data.get_int(data.city, 'id')
|
self.city_id = RawData.get_int_st(data.city, 'id')
|
||||||
self.city = data.get_int(data.city, 'title')
|
self.city = RawData.get_str_st(data.city, 'title')
|
||||||
self.home_town = data.home_town
|
self.home_town = data.get_str('home_town')
|
||||||
self.status = data.status
|
self.status = data.get_str('status')
|
||||||
self.site = data.site is not None
|
self.site = data.site is not None
|
||||||
self.facebook = data.facebook is not None
|
self.facebook = data.facebook is not None
|
||||||
self.twitter = data.twitter is not None
|
self.twitter = data.twitter is not None
|
||||||
self.instagram = data.instagram is not None
|
self.instagram = data.instagram is not None
|
||||||
self.mobile_phone = data.mobile_phone is not None
|
self.mobile_phone = data.mobile_phone is not None
|
||||||
self.activities = data.activities
|
self.activities = data.get_str('activities')
|
||||||
self.interests = data.interests
|
self.interests = data.get_str('interests')
|
||||||
self.books = data.books
|
self.books = data.get_str('books')
|
||||||
self.movies = data.movies
|
self.movies = data.get_str('movies')
|
||||||
self.music = data.music
|
self.music = data.get_str('music')
|
||||||
self.tv = data.tv
|
self.tv = data.get_str('tv')
|
||||||
self.games = data.games
|
self.games = data.get_str('games')
|
||||||
self.quotes = data.quotes
|
self.quotes = data.get_str('quotes')
|
||||||
self.about = data.about
|
self.about = data.get_str('about')
|
||||||
personal = None
|
personal = None
|
||||||
if data.personal is not None:
|
if data.personal is not None:
|
||||||
if len(data.personal) == 1:
|
if len(data.personal) == 1:
|
||||||
personal = data.personal[0]
|
personal = data.personal[0]
|
||||||
if len(data.personal) > 1:
|
if len(data.personal) > 1:
|
||||||
raise Exception(f'PERSONAL {data.id}')
|
raise Exception(f'PERSONAL {data.id}')
|
||||||
self.political = data.get_int(personal, 'political')
|
self.political = RawData.get_int_st(personal, 'political')
|
||||||
self.religion = data.get_str(personal, 'religion')
|
self.religion = RawData.get_str_st(personal, 'religion')
|
||||||
self.inspired_by = data.get_str(personal, 'inspired_by')
|
self.inspired_by = RawData.get_str_st(personal, 'inspired_by')
|
||||||
self.people_main = data.get_int(personal, 'people_main')
|
self.people_main = RawData.get_int_st(personal, 'people_main')
|
||||||
self.life_main = data.get_int(personal, 'life_main')
|
self.life_main = RawData.get_int_st(personal, 'life_main')
|
||||||
self.smoking = data.get_int(personal, 'smoking')
|
self.smoking = RawData.get_int_st(personal, 'smoking')
|
||||||
self.alcohol = data.get_int(personal, 'alcohol')
|
self.alcohol = RawData.get_int_st(personal, 'alcohol')
|
||||||
self.relatives = Person.__collection(data.relatives, lambda item: item['type'])
|
self.relatives = RawData.get_collection_st(data.relatives, lambda item: item['type'])
|
||||||
self.occupation_type = data.get_str(data.occupation, 'type')
|
self.occupation_type = RawData.get_str_st(data.occupation, 'type')
|
||||||
self.occupation_place_id = data.get_int(data.occupation, 'id')
|
self.occupation_place_name = RawData.get_str_st(data.occupation, 'name')
|
||||||
self.occupation_place_name = data.get_str(data.occupation, 'name')
|
self.universities = RawData.get_collection_st(data.universities, lambda item: University(item).__dict__)
|
||||||
self.universities = Person.__collection(data.universities, lambda item: University(item).__dict__)
|
self.schools = RawData.get_collection_st(data.schools, lambda item: School(item).__dict__)
|
||||||
self.schools = Person.__collection(data.schools, lambda item: School(item).__dict__)
|
self.career = RawData.get_collection_st(data.career, lambda item: Career(item).__dict__)
|
||||||
self.career = Person.__collection(data.career, lambda item: Career(item).__dict__)
|
self.military = RawData.get_collection_st(data.military, lambda item: Military(item).__dict__)
|
||||||
self.military = Person.__collection(data.military, lambda item: Military(item).__dict__)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def __collection(collection, function):
|
|
||||||
return list(map(lambda item: function(item), [] if collection is None else collection))
|
|
||||||
|
@ -5,22 +5,37 @@ class RawData:
|
|||||||
def __init__(self, data):
|
def __init__(self, data):
|
||||||
self.__dict__.update(data)
|
self.__dict__.update(data)
|
||||||
|
|
||||||
|
def get_str(self, attr):
|
||||||
|
return RawData.get_str_st(self.__dict__, attr)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_str(value, attr):
|
def get_str_st(value, attr):
|
||||||
if value is None:
|
if value is None:
|
||||||
return ''
|
return ''
|
||||||
return '' if value is None else value[attr]
|
result = value[attr]
|
||||||
|
if result is None:
|
||||||
|
return ''
|
||||||
|
return result
|
||||||
|
|
||||||
|
def get_int(self, attr):
|
||||||
|
return RawData.get_int_st(self.__dict__, attr)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_int(value, attr):
|
def get_int_st(value, attr):
|
||||||
if value is None:
|
if value is None:
|
||||||
return -1
|
return -1
|
||||||
return -1 if value is None else value[attr]
|
result = value[attr]
|
||||||
|
if result is None:
|
||||||
|
return -1
|
||||||
|
if not str(result).isnumeric():
|
||||||
|
print(f'The value {result} is not a number')
|
||||||
|
return -1
|
||||||
|
return result
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_date(value):
|
def get_date_st(value):
|
||||||
if value is None:
|
if value is None:
|
||||||
return None
|
return ''
|
||||||
try:
|
try:
|
||||||
return datetime.strptime(value, '%d.%m.%Y').date()
|
return datetime.strptime(value, '%d.%m.%Y').date()
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@ -28,4 +43,8 @@ class RawData:
|
|||||||
return datetime.strptime(value, '%d.%m.%y').date()
|
return datetime.strptime(value, '%d.%m.%y').date()
|
||||||
except ValueError:
|
except ValueError:
|
||||||
print(f'Invalid date {value}')
|
print(f'Invalid date {value}')
|
||||||
return None
|
return ''
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_collection_st(collection, function):
|
||||||
|
return list(map(lambda item: function(item), [] if collection is None else collection))
|
||||||
|
@ -4,15 +4,15 @@ from src.raw_data import RawData
|
|||||||
class School:
|
class School:
|
||||||
def __init__(self, raw_school):
|
def __init__(self, raw_school):
|
||||||
data = RawData(raw_school)
|
data = RawData(raw_school)
|
||||||
self.id = data.id
|
self.id = data.get_int('id')
|
||||||
self.name = data.name
|
self.name = data.get_str('name')
|
||||||
self.country_id = data.get_int(data.country, 'id')
|
self.country_id = RawData.get_int_st(data.country, 'id')
|
||||||
self.city_id = data.get_int(data.city, 'id')
|
self.city_id = RawData.get_int_st(data.city, 'id')
|
||||||
self.year_from = data.year_from
|
self.year_from = data.get_int('year_from')
|
||||||
self.year_to = data.year_to
|
self.year_to = data.get_int('year_to')
|
||||||
self.year_graduated = data.year_graduated
|
self.year_graduated = data.get_int('year_graduated')
|
||||||
self.speciality = data.speciality
|
self.speciality = data.get_str('speciality')
|
||||||
self.type_id = data.type
|
self.type_id = data.get_int('type')
|
||||||
self.type = data.type_str
|
self.type = data.get_str('type_str')
|
||||||
self.litera = raw_school['class']
|
self.litera_id = data.get_int('class_id')
|
||||||
self.litera_id = data.class_id
|
self.litera = data.get_str('class')
|
||||||
|
@ -4,16 +4,16 @@ from src.raw_data import RawData
|
|||||||
class University:
|
class University:
|
||||||
def __init__(self, raw_university):
|
def __init__(self, raw_university):
|
||||||
data = RawData(raw_university)
|
data = RawData(raw_university)
|
||||||
self.id = data.id
|
self.id = data.get_int('id')
|
||||||
self.name = data.name
|
self.name = data.get_str('name')
|
||||||
self.country_id = data.get_int(data.country, 'id')
|
self.country_id = RawData.get_int_st(data.country, 'id')
|
||||||
self.city_id = data.get_int(data.city, 'id')
|
self.city_id = RawData.get_int_st(data.city, 'id')
|
||||||
self.faculty_id = data.faculty
|
self.faculty_id = data.get_int('faculty')
|
||||||
self.faculty = data.faculty_name
|
self.faculty = data.get_str('faculty_name')
|
||||||
self.chair_id = data.chair
|
self.chair_id = data.get_int('chair')
|
||||||
self.chair = data.chair_name
|
self.chair = data.get_str('chair_name')
|
||||||
self.graduation = data.graduation
|
self.graduation = data.get_int('graduation')
|
||||||
self.form_id = data.education_form_id
|
self.form_id = data.get_int('education_form_id')
|
||||||
self.form = data.education_form
|
self.form = data.get_str('education_form')
|
||||||
self.status_id = data.education_status_id
|
self.status_id = data.get_int('education_status_id')
|
||||||
self.status = data.education_status
|
self.status = data.get_str('education_status')
|
||||||
|
Loading…
Reference in New Issue
Block a user