#!/usr/bin/env python3 import json import os import sys import pandas as pd from src.prepare_dataset.person import Person def __main(json_file_name): json_file = open(json_file_name, encoding='utf-8') data = json.load(json_file) persons = [] for item in data: person = Person(item) if person.is_closed: continue if person.deactivated: continue persons.append(person.__dict__) if len(persons) == 0: raise Exception("No data") df = pd.DataFrame() for key in persons[0].keys(): current_col = [] for person in persons: current_col.append(person[key]) df[key] = pd.Series(current_col) df = df.drop(columns=['is_closed', 'deactivated']) pathname, extension = os.path.splitext(json_file_name) filename = pathname.split('/')[-1] df.to_json(f'{filename}.private.json') if __name__ == '__main__': if len(sys.argv) != 2: print('You must specify the VK data in json') exit(1) if not os.path.isfile(sys.argv[1]): print(f'File {sys.argv[1]} is not exists') __main(sys.argv[1])