Skip to content

Instantly share code, notes, and snippets.

@sheymann
Created January 6, 2015 14:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sheymann/676261d4265959e8d4b2 to your computer and use it in GitHub Desktop.
Save sheymann/676261d4265959e8d4b2 to your computer and use it in GitHub Desktop.
Create a Neo4j database from Crunchbase data in Excel
//-----------------------
//Import the cities
//-----------------------
CREATE CONSTRAINT ON (e:CITY) ASSERT e.name IS UNIQUE;
//Create the companies' cities
USING PERIODIC COMMIT 2000
LOAD CSV WITH HEADERS FROM "file:c:/Companies.csv" AS line
FIELDTERMINATOR ';'
MERGE (:CITY { name: line.city });
//Create the investors' cities
USING PERIODIC COMMIT 2000
LOAD CSV WITH HEADERS FROM "file:c:/Investments.csv" AS line
FIELDTERMINATOR ';'
MERGE (:CITY { name: line.investor_city });
//-----------------------
//Import the markets
//-----------------------
CREATE CONSTRAINT ON (e:MARKET) ASSERT e.name IS UNIQUE;
//Create the companies' markets
USING PERIODIC COMMIT 2000
LOAD CSV WITH HEADERS FROM "file:c:/Companies.csv" AS line
FIELDTERMINATOR ';'
MERGE (:MARKET { name: line.market });
//Create the investors' markets
USING PERIODIC COMMIT 2000
LOAD CSV WITH HEADERS FROM "file:c:/Investments.csv" AS line
FIELDTERMINATOR ';'
MERGE (:MARKET { name: line.investor_market });
//-----------------------
//Import the companies
//-----------------------
CREATE CONSTRAINT ON (a:COMPANY) ASSERT a.permalink IS UNIQUE;
//Create the companies
USING PERIODIC COMMIT 2000
LOAD CSV WITH HEADERS FROM "file:c:/Companies.csv" AS line
FIELDTERMINATOR ';'
CREATE (a:COMPANY {
name: line.name,
permalink: line.permalink,
homepage_url: line.homepage_url,
funding_total: toInt(line.funding_total_usd),
funding_rounds: toInt(line.funding_rounds),
founded_at: line.founded_at,
founded_month: line.founded_month,
founded_quarter: line.founded_quarter,
founded_year: toInt(line.founded_year),
first_funding_at: line.first_funding_at,
last_funding_at: line.last_funding_at,
category: line.category_list,
market: line.market,
status: line.status,
country: line.country_code,
state: line.state_code,
region: line.region,
city: line.city,
url: 'http://www.crunchbase.com' + line.permalink,
logo: 'http://www.crunchbase.com' + line.permalink + '/primary-image/raw'
});
//Link companies to cities
MATCH (a:COMPANY)
MATCH (b:CITY { name: a.city })
CREATE (a)-[:HAS_CITY]->(b);
//Link companies to markets
MATCH (a:COMPANY)
MATCH (b:MARKET { name: a.market })
CREATE (a)-[:HAS_MARKET]->(b);
//-----------------------
//Import the investments
//-----------------------
CREATE CONSTRAINT ON (a:INVESTOR) ASSERT a.permalink IS UNIQUE;
//Create the investors
USING PERIODIC COMMIT 2000
LOAD CSV WITH HEADERS FROM "file:c:/Investments.csv" AS line
FIELDTERMINATOR ';'
MATCH (a:COMPANY { permalink: line.investor_permalink })
SET a:INVESTOR;
USING PERIODIC COMMIT 2000
LOAD CSV WITH HEADERS FROM "file:c:/Investments.csv" AS line
FIELDTERMINATOR ';'
MERGE (a:INVESTOR { permalink: line.investor_permalink })
ON CREATE SET a.name = line.investor_name,
a.category = line.investor_category_list,
a.market = line.investor_market,
a.country = line.investor_country_code,
a.state = line.investor_state_code,
a.region = line.investor_region,
a.city = line.investor_city,
a.logo = 'http://www.crunchbase.com' + line.permalink + '/primary-image/raw',
a.url = 'http://www.crunchbase.com' + line.investor_permalink;
//Link investors to cities
MATCH (a:INVESTOR)
MATCH (b:CITY { name: a.city})
MERGE (a)-[:HAS_CITY]->(b);
//Link investors to companies
USING PERIODIC COMMIT 2000
LOAD CSV WITH HEADERS FROM "file:c:/Investments.csv" AS line
FIELDTERMINATOR ';'
MATCH (a:INVESTOR { permalink: line.investor_permalink })
MATCH (b:COMPANY { permalink: line.company_permalink })
CREATE (a)-[:INVESTED_IN {
funded_at: line.funded_at,
funded_month: line.funded_month,
funded_quarter: line.funded_quarter,
funded_year: toInt(line.funded_year),
raised_amount_usd: toInt(line.raised_amount_usd),
funding_round_code: line.funding_round_code,
funding_round_type: line.funding_round_type,
permalink: line.funding_round_permalink,
url: 'http://www.crunchbase.com' + line.permalink
}]->(b);
//-----------------------
//Import the acquisitions
//-----------------------
USING PERIODIC COMMIT 2000
LOAD CSV WITH HEADERS FROM "file:c:/Acquisitions.csv" AS line
FIELDTERMINATOR ';'
MATCH (a:COMPANY { permalink: line.acquirer_permalink })
MATCH (b:COMPANY { permalink: line.company_permalink })
CREATE (a)-[:ACQUIRED {
acquisition_date: line.acquired_at,
month: line.acquired_month,
quarter: line.acquired_quarter,
year: line.acquired_year,
price_amount: toInt(line.price_amount),
currency: line.price_currency_code
}]->(b);
//-----------------------
//Cleaning up
//-----------------------
MATCH (n:COMPANY)
WHERE n.state = ''
REMOVE n.state;
MATCH (n:COMPANY)
WHERE n.homepage_url = ''
REMOVE n.homepage_url;
MATCH (n:COMPANY)
WHERE n.funding_total = ''
REMOVE n.funding_total;
MATCH (n:COMPANY)
WHERE n.funding_rounds = ''
REMOVE n.funding_rounds;
MATCH (n:COMPANY)
WHERE n.founded_at = ''
REMOVE n.founded_at;
MATCH (n:COMPANY)
WHERE n.founded_month = ''
REMOVE n.founded_month;
MATCH (n:COMPANY)
WHERE n.founded_quarter = ''
REMOVE n.founded_quarter;
MATCH (n:COMPANY)
WHERE n.founded_year = ''
REMOVE n.founded_year;
MATCH (n:COMPANY)
WHERE n.first_funding_at = ''
REMOVE n.first_funding_at;
MATCH (n:COMPANY)
WHERE n.last_funding_at = ''
REMOVE n.last_funding_at;
MATCH (n:COMPANY)
WHERE n.category = ''
REMOVE n.category;
MATCH (n:COMPANY)
WHERE n.market = ''
REMOVE n.market;
MATCH (n:COMPANY)
WHERE n.status = ''
REMOVE n.status;
MATCH (n:COMPANY)
WHERE n.country = ''
REMOVE n.country;
MATCH (n:COMPANY)
WHERE n.state = ''
REMOVE n.state;
MATCH (n:COMPANY)
WHERE n.region = ''
REMOVE n.region;
MATCH (n:COMPANY)
REMOVE n.city;
MATCH (n:INVESTOR)
WHERE n.state = ''
REMOVE n.state;
MATCH (n:INVESTOR)
WHERE n.category = ''
REMOVE n.category;
MATCH (n:INVESTOR)
WHERE n.market = ''
REMOVE n.market;
MATCH (n:INVESTOR)
WHERE n.city = ''
REMOVE n.city;
MATCH (n:INVESTOR)
WHERE n.region = ''
REMOVE n.region;
MATCH (n:INVESTOR)
WHERE n.country = ''
REMOVE n.country;
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY)
WHERE r.funded_quarter = ''
REMOVE r.funded_quarter;
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY)
WHERE r.funded_year = ''
REMOVE r.funded_year;
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY)
WHERE r.raised_amount_usd = ''
REMOVE r.raised_amount_usd;
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY)
WHERE r.funding_round_code = ''
REMOVE r.funding_round_code;
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY)
WHERE r.funding_round_type = ''
REMOVE r.funding_round_type;
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY)
WHERE r.funded_month = ''
REMOVE r.funded_month;
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY)
WHERE r.funded_at = ''
REMOVE r.funded_at;
MATCH (n:COMPANY)-[r:ACQUIRED]->(b:COMPANY)
WHERE r.acquisition_date = ''
REMOVE r.acquisition_date;
MATCH (n:COMPANY)-[r:ACQUIRED]->(b:COMPANY)
WHERE r.month = ''
REMOVE r.month;
MATCH (n:COMPANY)-[r:ACQUIRED]->(b:COMPANY)
WHERE r.quarter = ''
REMOVE r.quarter;
MATCH (n:COMPANY)-[r:ACQUIRED]->(b:COMPANY)
WHERE r.year = ''
REMOVE r.year;
MATCH (n:COMPANY)-[r:ACQUIRED]->(b:COMPANY)
WHERE r.price_amount = ''
REMOVE r.price_amount;
MATCH (n:COMPANY)-[r:ACQUIRED]->(b:COMPANY)
WHERE r.currency = ''
REMOVE r.currency;
MATCH (n:CITY {name :''})-[r]-()
DELETE n, r;
MATCH (n:MARKET {name :''})-[r]-()
DELETE n, r;
@sheymann
Copy link
Author

sheymann commented Jan 6, 2015

Export French companies:

export-graphml -t -o crunchbase-fr.graphml match (n)-[r]-(a:COMPANY {country:'FRA'}) return n,r,a

Import French companies into Neo4j:

import-graphml -t -i crunchbase-fr.graphml -b 20000

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment