Last active
February 11, 2019 19:04
-
-
Save lucahammer/216a2f65e28a2863ddfa69ec0384e4d7 to your computer and use it in GitHub Desktop.
Blogpost: https://lucahammer.com/2019/02/11/network-visualization-of-every-registered-company-in-germany/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json_lines | |
import pprint | |
pp = pprint.PrettyPrinter(indent=1) | |
def get_companies(line_number=0,lines=1): | |
""" | |
Returns as many companies as you want as a list. | |
The higher the starting line, the longer it takes. | |
""" | |
companies = [] | |
with open('local_data/de_companies_ocdata.jsonl', 'rb') as f: | |
current_line = 0 | |
max_line = line_number + lines | |
for company in json_lines.reader(f): | |
if current_line >= max_line: | |
return(companies) | |
if current_line >= line_number: | |
companies.append(company) | |
current_line += 1 | |
def get_companies_and_officers(): | |
''' | |
Returns lists of all currently registered | |
companies and their officers. | |
''' | |
data = {'allofficers': {}, #[{name: ['company 1', 'company 2']}] | |
'allcompanies': {}} | |
with open('local_data/de_companies_ocdata.jsonl', 'rb') as f: | |
for company in json_lines.reader(f): | |
if 'current_status' in company: | |
if company['current_status'] == 'currently registered': | |
data['allcompanies'][company['company_number']] = company['name'] | |
if 'officers' in company: | |
for officer in company['officers']: | |
if 'end_date' not in officer: | |
if officer['name'] in data['allofficers']: | |
data['allofficers'][officer['name']].append(company['company_number']) | |
else: | |
data['allofficers'][officer['name']] = [company['company_number']] | |
return(data) | |
def create_companies_network(data): | |
''' | |
Writes a .gdf with companies | |
connected by officers. | |
''' | |
with open('local_data/offeneregister-companies.gdf', 'w', encoding='utf-8') as output: | |
output.write('nodedef>name VARCHAR,label VARCHAR\n') | |
for company,name in data['allcompanies'].items(): | |
output.write('{0},{1}\n'.format(company,name.replace(',','COMMA').replace('\n', 'NEWLINE'))) | |
output.write('edgedef>node1 VARCHAR,node2 VARCHAR\n') | |
for officer,companies in data['allofficers'].items(): | |
for i, company in enumerate(companies): | |
for y in range(i+1, len(companies)): | |
if companies[i] != companies[y]: | |
output.write('{0},{1}\n'.format(companies[i],companies[y])) | |
print('Companies network created.') | |
def create_officers_network(): | |
''' | |
Writes a .gdf with officers | |
connected by their companies. | |
''' | |
with open('local_data/de_companies_ocdata.jsonl', 'rb') as f: | |
with open('local_data/offeneregister-officers.gdf', 'w', encoding='utf-8') as output: | |
output.write('nodedef>name VARCHAR,label VARCHAR\n') | |
output.write('edgedef>node1 VARCHAR,node2 VARCHAR\n') | |
for company in json_lines.reader(f): | |
if 'current_status' in company: | |
if company['current_status'] == 'currently registered': | |
if 'officers' in company: | |
for officer_a in company['officers']: | |
if 'end_date' not in officer_a: | |
for officer_b in company['officers']: | |
if 'end_date' not in officer_b: | |
if officer_a['name'] != officer_b['name']: | |
output.write('{0},{1}\n'.format( | |
officer_a['name'].replace(',', 'COMMA').replace('\n', 'NEWLINE'), | |
officer_b['name'].replace(',', 'COMMA').replace('\n', 'NEWLINE'))) | |
print('Officers network created.') | |
def create_officers_companies_network(registrar='München'): | |
''' | |
Writes a .gdf with companies and | |
officers connected by each other. | |
''' | |
nodes = [] | |
edges = [] | |
with open('local_data/de_companies_ocdata.jsonl', 'rb') as f: | |
for company in json_lines.reader(f): | |
if company['all_attributes']['registrar'] == registrar: | |
if 'current_status' in company: | |
if company['current_status'] == 'currently registered': | |
nodes.append('{0},{1}\n'.format( | |
company['company_number'], | |
company['name'].replace(',', 'COMMA').replace('\n', 'NEWLINE'))) | |
if 'officers' in company: | |
for officer in company['officers']: | |
if 'end_date' not in officer: | |
edges.append('{0},{1},TRUE\n'.format( | |
officer['name'].replace(',', 'COMMA').replace('\n', 'NEWLINE'), | |
company['company_number'])) | |
with open('local_data/offeneregister-combined-{0}.gdf'.format(registrar), 'w', encoding='utf-8') as output: | |
output.write('nodedef>name VARCHAR,label VARCHAR\n') | |
for node in nodes: | |
output.write(node) | |
output.write('edgedef>node1 VARCHAR,node2 VARCHAR,directed BOOLEAN\n') | |
for edge in edges: | |
output.write(edge) | |
print('Combined network created.') | |
#pp.pprint(get_companies(0,2)) | |
#data = get_companies_and_officers() | |
#create_officers_network() | |
#create_companies_network(data) | |
create_officers_companies_network('Köln') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment