Skip to content

Instantly share code, notes, and snippets.

Last active February 11, 2019 19:04
Show Gist options
  • Save lucahammer/216a2f65e28a2863ddfa69ec0384e4d7 to your computer and use it in GitHub Desktop.
Save lucahammer/216a2f65e28a2863ddfa69ec0384e4d7 to your computer and use it in GitHub Desktop.
import json_lines
import pprint
pp = pprint.PrettyPrinter(indent=1)
def get_companies(line_number=0,lines=1):
Returns as many companies as you want as a list.
The higher the starting line, the longer it takes.
companies = []
with open('local_data/de_companies_ocdata.jsonl', 'rb') as f:
current_line = 0
max_line = line_number + lines
for company in json_lines.reader(f):
if current_line >= max_line:
if current_line >= line_number:
current_line += 1
def get_companies_and_officers():
Returns lists of all currently registered
companies and their officers.
data = {'allofficers': {}, #[{name: ['company 1', 'company 2']}]
'allcompanies': {}}
with open('local_data/de_companies_ocdata.jsonl', 'rb') as f:
for company in json_lines.reader(f):
if 'current_status' in company:
if company['current_status'] == 'currently registered':
data['allcompanies'][company['company_number']] = company['name']
if 'officers' in company:
for officer in company['officers']:
if 'end_date' not in officer:
if officer['name'] in data['allofficers']:
data['allofficers'][officer['name']] = [company['company_number']]
def create_companies_network(data):
Writes a .gdf with companies
connected by officers.
with open('local_data/offeneregister-companies.gdf', 'w', encoding='utf-8') as output:
output.write('nodedef>name VARCHAR,label VARCHAR\n')
for company,name in data['allcompanies'].items():
output.write('{0},{1}\n'.format(company,name.replace(',','COMMA').replace('\n', 'NEWLINE')))
output.write('edgedef>node1 VARCHAR,node2 VARCHAR\n')
for officer,companies in data['allofficers'].items():
for i, company in enumerate(companies):
for y in range(i+1, len(companies)):
if companies[i] != companies[y]:
print('Companies network created.')
def create_officers_network():
Writes a .gdf with officers
connected by their companies.
with open('local_data/de_companies_ocdata.jsonl', 'rb') as f:
with open('local_data/offeneregister-officers.gdf', 'w', encoding='utf-8') as output:
output.write('nodedef>name VARCHAR,label VARCHAR\n')
output.write('edgedef>node1 VARCHAR,node2 VARCHAR\n')
for company in json_lines.reader(f):
if 'current_status' in company:
if company['current_status'] == 'currently registered':
if 'officers' in company:
for officer_a in company['officers']:
if 'end_date' not in officer_a:
for officer_b in company['officers']:
if 'end_date' not in officer_b:
if officer_a['name'] != officer_b['name']:
officer_a['name'].replace(',', 'COMMA').replace('\n', 'NEWLINE'),
officer_b['name'].replace(',', 'COMMA').replace('\n', 'NEWLINE')))
print('Officers network created.')
def create_officers_companies_network(registrar='München'):
Writes a .gdf with companies and
officers connected by each other.
nodes = []
edges = []
with open('local_data/de_companies_ocdata.jsonl', 'rb') as f:
for company in json_lines.reader(f):
if company['all_attributes']['registrar'] == registrar:
if 'current_status' in company:
if company['current_status'] == 'currently registered':
company['name'].replace(',', 'COMMA').replace('\n', 'NEWLINE')))
if 'officers' in company:
for officer in company['officers']:
if 'end_date' not in officer:
officer['name'].replace(',', 'COMMA').replace('\n', 'NEWLINE'),
with open('local_data/offeneregister-combined-{0}.gdf'.format(registrar), 'w', encoding='utf-8') as output:
output.write('nodedef>name VARCHAR,label VARCHAR\n')
for node in nodes:
output.write('edgedef>node1 VARCHAR,node2 VARCHAR,directed BOOLEAN\n')
for edge in edges:
print('Combined network created.')
#data = get_companies_and_officers()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment