Improve models for prepare_dataset script for minimize null data
This commit is contained in:
parent
3a51a23048
commit
86fbf158f0
@ -4,11 +4,11 @@ from src.raw_data import RawData
|
||||
class Career:
|
||||
def __init__(self, raw_career):
|
||||
data = RawData(raw_career)
|
||||
self.id = data.id
|
||||
self.group_id = data.group_id
|
||||
self.company = data.company
|
||||
self.country_id = data.country_id
|
||||
self.city_id = data.city_id
|
||||
self.year_from = raw_career['from']
|
||||
self.until = data.until
|
||||
self.position = data.position
|
||||
self.id = data.get_int('id')
|
||||
self.group_id = data.get_int('group_id')
|
||||
self.company = data.get_str('company')
|
||||
self.country_id = data.get_int('country_id')
|
||||
self.city_id = data.get_int('city_id')
|
||||
self.year_from = data.get_int('from')
|
||||
self.until = data.get_int('until')
|
||||
self.position = data.get_str('position')
|
||||
|
@ -4,9 +4,9 @@ from src.raw_data import RawData
|
||||
class Military:
|
||||
def __init__(self, raw_military):
|
||||
data = RawData(raw_military)
|
||||
self.id = data.id
|
||||
self.unit_id = data.unit_id
|
||||
self.unit = data.unit
|
||||
self.country_id = data.country_id
|
||||
self.year_from = raw_military['from']
|
||||
self.until = data.until
|
||||
self.id = data.get_int('id')
|
||||
self.unit_id = data.get_int('unit_id')
|
||||
self.unit = data.get_str('unit')
|
||||
self.country_id = data.get_int('country_id')
|
||||
self.year_from = data.get_int('from')
|
||||
self.until = data.get_int('until')
|
||||
|
@ -13,51 +13,46 @@ class Person:
|
||||
self.deactivated = data.deactivated is not None
|
||||
self.has_photo = data.has_photo == 1
|
||||
self.followers_count = data.followers_count
|
||||
self.sex = data.sex
|
||||
self.bdate = data.get_date(data.bdate)
|
||||
self.relation = data.relation
|
||||
self.country_id = data.get_int(data.country, 'id')
|
||||
self.country = data.get_int(data.country, 'title')
|
||||
self.city_id = data.get_int(data.city, 'id')
|
||||
self.city = data.get_int(data.city, 'title')
|
||||
self.home_town = data.home_town
|
||||
self.status = data.status
|
||||
self.sex = data.get_int('sex')
|
||||
self.bdate = RawData.get_date_st(data.bdate)
|
||||
self.relation = data.get_int('relation')
|
||||
self.country_id = RawData.get_int_st(data.country, 'id')
|
||||
self.country = RawData.get_str_st(data.country, 'title')
|
||||
self.city_id = RawData.get_int_st(data.city, 'id')
|
||||
self.city = RawData.get_str_st(data.city, 'title')
|
||||
self.home_town = data.get_str('home_town')
|
||||
self.status = data.get_str('status')
|
||||
self.site = data.site is not None
|
||||
self.facebook = data.facebook is not None
|
||||
self.twitter = data.twitter is not None
|
||||
self.instagram = data.instagram is not None
|
||||
self.mobile_phone = data.mobile_phone is not None
|
||||
self.activities = data.activities
|
||||
self.interests = data.interests
|
||||
self.books = data.books
|
||||
self.movies = data.movies
|
||||
self.music = data.music
|
||||
self.tv = data.tv
|
||||
self.games = data.games
|
||||
self.quotes = data.quotes
|
||||
self.about = data.about
|
||||
self.activities = data.get_str('activities')
|
||||
self.interests = data.get_str('interests')
|
||||
self.books = data.get_str('books')
|
||||
self.movies = data.get_str('movies')
|
||||
self.music = data.get_str('music')
|
||||
self.tv = data.get_str('tv')
|
||||
self.games = data.get_str('games')
|
||||
self.quotes = data.get_str('quotes')
|
||||
self.about = data.get_str('about')
|
||||
personal = None
|
||||
if data.personal is not None:
|
||||
if len(data.personal) == 1:
|
||||
personal = data.personal[0]
|
||||
if len(data.personal) > 1:
|
||||
raise Exception(f'PERSONAL {data.id}')
|
||||
self.political = data.get_int(personal, 'political')
|
||||
self.religion = data.get_str(personal, 'religion')
|
||||
self.inspired_by = data.get_str(personal, 'inspired_by')
|
||||
self.people_main = data.get_int(personal, 'people_main')
|
||||
self.life_main = data.get_int(personal, 'life_main')
|
||||
self.smoking = data.get_int(personal, 'smoking')
|
||||
self.alcohol = data.get_int(personal, 'alcohol')
|
||||
self.relatives = Person.__collection(data.relatives, lambda item: item['type'])
|
||||
self.occupation_type = data.get_str(data.occupation, 'type')
|
||||
self.occupation_place_id = data.get_int(data.occupation, 'id')
|
||||
self.occupation_place_name = data.get_str(data.occupation, 'name')
|
||||
self.universities = Person.__collection(data.universities, lambda item: University(item).__dict__)
|
||||
self.schools = Person.__collection(data.schools, lambda item: School(item).__dict__)
|
||||
self.career = Person.__collection(data.career, lambda item: Career(item).__dict__)
|
||||
self.military = Person.__collection(data.military, lambda item: Military(item).__dict__)
|
||||
|
||||
@staticmethod
|
||||
def __collection(collection, function):
|
||||
return list(map(lambda item: function(item), [] if collection is None else collection))
|
||||
self.political = RawData.get_int_st(personal, 'political')
|
||||
self.religion = RawData.get_str_st(personal, 'religion')
|
||||
self.inspired_by = RawData.get_str_st(personal, 'inspired_by')
|
||||
self.people_main = RawData.get_int_st(personal, 'people_main')
|
||||
self.life_main = RawData.get_int_st(personal, 'life_main')
|
||||
self.smoking = RawData.get_int_st(personal, 'smoking')
|
||||
self.alcohol = RawData.get_int_st(personal, 'alcohol')
|
||||
self.relatives = RawData.get_collection_st(data.relatives, lambda item: item['type'])
|
||||
self.occupation_type = RawData.get_str_st(data.occupation, 'type')
|
||||
self.occupation_place_name = RawData.get_str_st(data.occupation, 'name')
|
||||
self.universities = RawData.get_collection_st(data.universities, lambda item: University(item).__dict__)
|
||||
self.schools = RawData.get_collection_st(data.schools, lambda item: School(item).__dict__)
|
||||
self.career = RawData.get_collection_st(data.career, lambda item: Career(item).__dict__)
|
||||
self.military = RawData.get_collection_st(data.military, lambda item: Military(item).__dict__)
|
||||
|
@ -5,22 +5,37 @@ class RawData:
|
||||
def __init__(self, data):
|
||||
self.__dict__.update(data)
|
||||
|
||||
def get_str(self, attr):
|
||||
return RawData.get_str_st(self.__dict__, attr)
|
||||
|
||||
@staticmethod
|
||||
def get_str(value, attr):
|
||||
def get_str_st(value, attr):
|
||||
if value is None:
|
||||
return ''
|
||||
return '' if value is None else value[attr]
|
||||
result = value[attr]
|
||||
if result is None:
|
||||
return ''
|
||||
return result
|
||||
|
||||
def get_int(self, attr):
|
||||
return RawData.get_int_st(self.__dict__, attr)
|
||||
|
||||
@staticmethod
|
||||
def get_int(value, attr):
|
||||
def get_int_st(value, attr):
|
||||
if value is None:
|
||||
return -1
|
||||
return -1 if value is None else value[attr]
|
||||
result = value[attr]
|
||||
if result is None:
|
||||
return -1
|
||||
if not str(result).isnumeric():
|
||||
print(f'The value {result} is not a number')
|
||||
return -1
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def get_date(value):
|
||||
def get_date_st(value):
|
||||
if value is None:
|
||||
return None
|
||||
return ''
|
||||
try:
|
||||
return datetime.strptime(value, '%d.%m.%Y').date()
|
||||
except ValueError:
|
||||
@ -28,4 +43,8 @@ class RawData:
|
||||
return datetime.strptime(value, '%d.%m.%y').date()
|
||||
except ValueError:
|
||||
print(f'Invalid date {value}')
|
||||
return None
|
||||
return ''
|
||||
|
||||
@staticmethod
|
||||
def get_collection_st(collection, function):
|
||||
return list(map(lambda item: function(item), [] if collection is None else collection))
|
||||
|
@ -4,15 +4,15 @@ from src.raw_data import RawData
|
||||
class School:
|
||||
def __init__(self, raw_school):
|
||||
data = RawData(raw_school)
|
||||
self.id = data.id
|
||||
self.name = data.name
|
||||
self.country_id = data.get_int(data.country, 'id')
|
||||
self.city_id = data.get_int(data.city, 'id')
|
||||
self.year_from = data.year_from
|
||||
self.year_to = data.year_to
|
||||
self.year_graduated = data.year_graduated
|
||||
self.speciality = data.speciality
|
||||
self.type_id = data.type
|
||||
self.type = data.type_str
|
||||
self.litera = raw_school['class']
|
||||
self.litera_id = data.class_id
|
||||
self.id = data.get_int('id')
|
||||
self.name = data.get_str('name')
|
||||
self.country_id = RawData.get_int_st(data.country, 'id')
|
||||
self.city_id = RawData.get_int_st(data.city, 'id')
|
||||
self.year_from = data.get_int('year_from')
|
||||
self.year_to = data.get_int('year_to')
|
||||
self.year_graduated = data.get_int('year_graduated')
|
||||
self.speciality = data.get_str('speciality')
|
||||
self.type_id = data.get_int('type')
|
||||
self.type = data.get_str('type_str')
|
||||
self.litera_id = data.get_int('class_id')
|
||||
self.litera = data.get_str('class')
|
||||
|
@ -4,16 +4,16 @@ from src.raw_data import RawData
|
||||
class University:
|
||||
def __init__(self, raw_university):
|
||||
data = RawData(raw_university)
|
||||
self.id = data.id
|
||||
self.name = data.name
|
||||
self.country_id = data.get_int(data.country, 'id')
|
||||
self.city_id = data.get_int(data.city, 'id')
|
||||
self.faculty_id = data.faculty
|
||||
self.faculty = data.faculty_name
|
||||
self.chair_id = data.chair
|
||||
self.chair = data.chair_name
|
||||
self.graduation = data.graduation
|
||||
self.form_id = data.education_form_id
|
||||
self.form = data.education_form
|
||||
self.status_id = data.education_status_id
|
||||
self.status = data.education_status
|
||||
self.id = data.get_int('id')
|
||||
self.name = data.get_str('name')
|
||||
self.country_id = RawData.get_int_st(data.country, 'id')
|
||||
self.city_id = RawData.get_int_st(data.city, 'id')
|
||||
self.faculty_id = data.get_int('faculty')
|
||||
self.faculty = data.get_str('faculty_name')
|
||||
self.chair_id = data.get_int('chair')
|
||||
self.chair = data.get_str('chair_name')
|
||||
self.graduation = data.get_int('graduation')
|
||||
self.form_id = data.get_int('education_form_id')
|
||||
self.form = data.get_str('education_form')
|
||||
self.status_id = data.get_int('education_status_id')
|
||||
self.status = data.get_str('education_status')
|
||||
|
Loading…
Reference in New Issue
Block a user