Skip to content

Instantly share code, notes, and snippets.

@lucahammer
Last active February 11, 2019 19:04
Show Gist options
  • Save lucahammer/216a2f65e28a2863ddfa69ec0384e4d7 to your computer and use it in GitHub Desktop.
Save lucahammer/216a2f65e28a2863ddfa69ec0384e4d7 to your computer and use it in GitHub Desktop.
import json_lines
import pprint
pp = pprint.PrettyPrinter(indent=1)
def get_companies(line_number=0,lines=1):
"""
Returns as many companies as you want as a list.
The higher the starting line, the longer it takes.
"""
companies = []
with open('local_data/de_companies_ocdata.jsonl', 'rb') as f:
current_line = 0
max_line = line_number + lines
for company in json_lines.reader(f):
if current_line >= max_line:
return(companies)
if current_line >= line_number:
companies.append(company)
current_line += 1
def get_companies_and_officers():
'''
Returns lists of all currently registered
companies and their officers.
'''
data = {'allofficers': {}, #[{name: ['company 1', 'company 2']}]
'allcompanies': {}}
with open('local_data/de_companies_ocdata.jsonl', 'rb') as f:
for company in json_lines.reader(f):
if 'current_status' in company:
if company['current_status'] == 'currently registered':
data['allcompanies'][company['company_number']] = company['name']
if 'officers' in company:
for officer in company['officers']:
if 'end_date' not in officer:
if officer['name'] in data['allofficers']:
data['allofficers'][officer['name']].append(company['company_number'])
else:
data['allofficers'][officer['name']] = [company['company_number']]
return(data)
def create_companies_network(data):
'''
Writes a .gdf with companies
connected by officers.
'''
with open('local_data/offeneregister-companies.gdf', 'w', encoding='utf-8') as output:
output.write('nodedef>name VARCHAR,label VARCHAR\n')
for company,name in data['allcompanies'].items():
output.write('{0},{1}\n'.format(company,name.replace(',','COMMA').replace('\n', 'NEWLINE')))
output.write('edgedef>node1 VARCHAR,node2 VARCHAR\n')
for officer,companies in data['allofficers'].items():
for i, company in enumerate(companies):
for y in range(i+1, len(companies)):
if companies[i] != companies[y]:
output.write('{0},{1}\n'.format(companies[i],companies[y]))
print('Companies network created.')
def create_officers_network():
'''
Writes a .gdf with officers
connected by their companies.
'''
with open('local_data/de_companies_ocdata.jsonl', 'rb') as f:
with open('local_data/offeneregister-officers.gdf', 'w', encoding='utf-8') as output:
output.write('nodedef>name VARCHAR,label VARCHAR\n')
output.write('edgedef>node1 VARCHAR,node2 VARCHAR\n')
for company in json_lines.reader(f):
if 'current_status' in company:
if company['current_status'] == 'currently registered':
if 'officers' in company:
for officer_a in company['officers']:
if 'end_date' not in officer_a:
for officer_b in company['officers']:
if 'end_date' not in officer_b:
if officer_a['name'] != officer_b['name']:
output.write('{0},{1}\n'.format(
officer_a['name'].replace(',', 'COMMA').replace('\n', 'NEWLINE'),
officer_b['name'].replace(',', 'COMMA').replace('\n', 'NEWLINE')))
print('Officers network created.')
def create_officers_companies_network(registrar='München'):
'''
Writes a .gdf with companies and
officers connected by each other.
'''
nodes = []
edges = []
with open('local_data/de_companies_ocdata.jsonl', 'rb') as f:
for company in json_lines.reader(f):
if company['all_attributes']['registrar'] == registrar:
if 'current_status' in company:
if company['current_status'] == 'currently registered':
nodes.append('{0},{1}\n'.format(
company['company_number'],
company['name'].replace(',', 'COMMA').replace('\n', 'NEWLINE')))
if 'officers' in company:
for officer in company['officers']:
if 'end_date' not in officer:
edges.append('{0},{1},TRUE\n'.format(
officer['name'].replace(',', 'COMMA').replace('\n', 'NEWLINE'),
company['company_number']))
with open('local_data/offeneregister-combined-{0}.gdf'.format(registrar), 'w', encoding='utf-8') as output:
output.write('nodedef>name VARCHAR,label VARCHAR\n')
for node in nodes:
output.write(node)
output.write('edgedef>node1 VARCHAR,node2 VARCHAR,directed BOOLEAN\n')
for edge in edges:
output.write(edge)
print('Combined network created.')
#pp.pprint(get_companies(0,2))
#data = get_companies_and_officers()
#create_officers_network()
#create_companies_network(data)
create_officers_companies_network('Köln')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment