social-clusters/prepare_dataset.py

50 lines
1.1 KiB
Python

#!/usr/bin/env python3
import json
import os
import sys
import pandas as pd
from src.prepare_dataset.person import Person
def __main(json_file_name):
json_file = open(json_file_name, encoding='utf-8')
data = json.load(json_file)
persons = []
for item in data:
person = Person(item)
if person.is_closed:
continue
if person.deactivated:
continue
persons.append(person.__dict__)
if len(persons) == 0:
raise Exception("No data")
df = pd.DataFrame()
for key in persons[0].keys():
current_col = []
for person in persons:
current_col.append(person[key])
df[key] = pd.Series(current_col)
df = df.drop(columns=['is_closed', 'deactivated'])
pathname, extension = os.path.splitext(json_file_name)
filename = pathname.split('/')[-1]
df.to_json(f'{filename}.private.json')
if __name__ == '__main__':
if len(sys.argv) != 2:
print('You must specify the VK data in json')
exit(1)
if not os.path.isfile(sys.argv[1]):
print(f'File {sys.argv[1]} is not exists')
__main(sys.argv[1])