50 lines
1.1 KiB
Python
50 lines
1.1 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
|
|
import pandas as pd
|
|
|
|
from src.prepare_dataset.person import Person
|
|
|
|
|
|
def __main(json_file_name):
|
|
json_file = open(json_file_name, encoding='utf-8')
|
|
data = json.load(json_file)
|
|
|
|
persons = []
|
|
for item in data:
|
|
person = Person(item)
|
|
if person.is_closed:
|
|
continue
|
|
if person.deactivated:
|
|
continue
|
|
persons.append(person.__dict__)
|
|
|
|
if len(persons) == 0:
|
|
raise Exception("No data")
|
|
|
|
df = pd.DataFrame()
|
|
for key in persons[0].keys():
|
|
current_col = []
|
|
for person in persons:
|
|
current_col.append(person[key])
|
|
df[key] = pd.Series(current_col)
|
|
|
|
df = df.drop(columns=['is_closed', 'deactivated'])
|
|
|
|
pathname, extension = os.path.splitext(json_file_name)
|
|
filename = pathname.split('/')[-1]
|
|
|
|
df.to_json(f'{filename}.private.json')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if len(sys.argv) != 2:
|
|
print('You must specify the VK data in json')
|
|
exit(1)
|
|
if not os.path.isfile(sys.argv[1]):
|
|
print(f'File {sys.argv[1]} is not exists')
|
|
__main(sys.argv[1])
|