rvanbruggen/1-scm-background.txt

## 1-scm-background.txt
// Found article on https://or.stackexchange.com/questions/529/supply-chain-public-data-repository
// https://pubsonline.informs.org/doi/suppl/10.1287/msom.1070.0176
// https://pubsonline.informs.org/doi/suppl/10.1287/msom.1070.0176/suppl_file/msom.1070.0176-sm-datainexcel.zip

//Article by Sean P. Willems: https://pdfs.semanticscholar.org/232c/451fcf58dbcc1527de6d02cd6e76aea9e871.pdf?_ga=2.33151675.429569592.1581427039-1552162479.1581427039
Table 2 Classifications Used to Label Every Stage in the Chains
Classifications label Activity
  Dist_ A stage that distributes an item
  Manuf_ A stage that manufactures or assembles an item
  Part_ A stage that procures an item
  Retail_ A stage that acts as a demand orgination point
  Trans_ A stage that transports an item between stages
//appendix: http://willems.utk.edu/papers/dl/Willems_MSOM_v10n1_Winter2008_Appendix.pdf


//full spreadsheet: https://drive.google.com/file/d/1jfheHEKIZK1cnkC8vs-M2vQIcFCtEJ1o/view?usp=sharing
//google sheet: https://docs.google.com/spreadsheets/d/1vD4UCxsincEQYQpAeCbEqnd3QFdKcuL4ukesbyu5RIc/edit#gid=1371172744

## 2-scm-import.cql
//importing the dataset
create index on :SupplyChain(id);
create index on :Stage(name);
create index on :Stage(betweenness);
create index on :SIC(code);
create index on :Company(id);
create index on :StageClassification(name);

//load the companies
load csv with headers from "https://docs.google.com/spreadsheets/u/0/d/1vD4UCxsincEQYQpAeCbEqnd3QFdKcuL4ukesbyu5RIc/export?format=csv&id=1vD4UCxsincEQYQpAeCbEqnd3QFdKcuL4ukesbyu5RIc&gid=0" as csv
merge (sic:SIC {code: toInt(csv.SIC_Code), desc: csv.SIC_Description})
merge (c:Company {id: toInt(csv.companyId)})
merge (ch:SupplyChain {id: toInt(csv.chainId)})
merge (c)-[:HAS_SIC]->(sic)
create (c)-[:HAS_CHAIN]->(ch);

//load the rels between the chain stages
load csv with headers from
"https://docs.google.com/spreadsheets/u/0/d/1vD4UCxsincEQYQpAeCbEqnd3QFdKcuL4ukesbyu5RIc/export?format=csv&id=1vD4UCxsincEQYQpAeCbEqnd3QFdKcuL4ukesbyu5RIc&gid=1371172744" as csv
merge (ss:Stage {name: csv.sourceStage, chainId: toInt(csv.chainId)})
merge (ds:Stage {name: csv.destinationStage, chainId: toInt(csv.chainId)})
create (ss)-[:PRECEDES]->(ds);

//create the STARTS_WITH and ENDS rels between the SupplyChain and the first and last chain stage
//create STARTS_WITH
load csv with headers from
"https://docs.google.com/spreadsheets/u/0/d/1vD4UCxsincEQYQpAeCbEqnd3QFdKcuL4ukesbyu5RIc/export?format=csv&id=1vD4UCxsincEQYQpAeCbEqnd3QFdKcuL4ukesbyu5RIc&gid=1371172744" as csv
match (sc:SupplyChain {id: toInt(csv.chainId)}), (ss:Stage {name: csv.sourceStage, chainId: toInt(csv.chainId)})
where not (()-[:PRECEDES]->(ss))
AND sc.id = ss.chainId
merge (sc)-[:STARTS_WITH]->(ss);
//create ENDS
match path = (sc:SupplyChain)-[:STARTS_WITH]->(startstage:Stage)-[:PRECEDES*]->(endstage:Stage)
where not ((endstage)-[:PRECEDES]->())
merge (sc)<-[:ENDS]-(endstage);


//load the detail of each Stage
load csv with headers from
"https://docs.google.com/spreadsheets/u/0/d/1vD4UCxsincEQYQpAeCbEqnd3QFdKcuL4ukesbyu5RIc/export?format=csv&id=1vD4UCxsincEQYQpAeCbEqnd3QFdKcuL4ukesbyu5RIc&gid=579862779" as csv
match (s:Stage {chainId: toInt(csv.chainId), name: csv.Stage_Name})
set s.stageCost = toFloat(csv.stageCost)
set s.relDepth = toInteger(csv.relDepth)
set s.stageClassification = csv.stageClassification
set s.avgDemand = csv.avgDemand
set s.stdDevDemand = csv.stdDevDemand
set s.maxServiceTime	= csv.maxServiceTime
set s.serviceLevel = csv.serviceLevel
set s.stageTime = toFloat(csv.stageTime)
set s.stdDev_stageTime = csv.stdDev_stageTime;


//split out the stageClassifications
match (s:Stage)
merge (sc:StageClassification {name: s.stageClassification})
create (s)-[:IS_A]->(sc);

//merge the two "Parts" stageClassifications
match (sc1:StageClassification {name:"Parts"}), (sc2:StageClassification {name:"Part"})
WITH head(collect([sc1,sc2])) as nodes
CALL apoc.refactor.mergeNodes(nodes,{properties:"combine", mergeRels:true}) yield node
set node.name = "Parts"
return count(node);


//model in text
match (n)-[r]->(m)
return distinct labels(n), type(r), labels(m);
//model in visualization
call db.schema.visualization;

## 3-scm-queries.cql
// data profiling and validation
match (n) return distinct labels(n), count(*);
match (n)-[r]->() return distinct type(r), count(*);
match p= (sc:SupplyChain)<-[:HAS_CHAIN]-(c:Company)-[HAS_SIC]->(s:SIC)
return p limit 25;
match (sc:SupplyChain {id:1})-[*..3]-(conn) return sc, conn limit 10;

// Some SCM queries
//depth of SC Network by traversing graph
match path = ((sc:SupplyChain)-[:STARTS_WITH|PRECEDES*]->(s:Stage))
where not ((s)-[:PRECEDES]->())
return  distinct sc.id, (max(length(path))-1) as maxlength
order by maxlength desc;

//depth of SC network by reading stage properties
match (sc:SupplyChain)-->(s:Stage)
return distinct sc.id, max(s.relDepth) as maxlengthprop
order by maxlengthprop desc;

//how many start and end nodes
match (s1:Stage)<-[start:STARTS_WITH]-(sc:SupplyChain)
with sc, count(start) as starts
match (sc)<-[end:ENDS]-(s2)
return sc.id, starts, count(end);

//verify the number of endstages for a SupplyChain by counting the paths
match path = (:SupplyChain {id:12})-[:STARTS_WITH]->(startstage:Stage)-[:PRECEDES*]->(endstage:Stage)
where not ((endstage)-[:PRECEDES]->())
with distinct endstage
return count(endstage);

//Zooming in on chain nr 20 - different path lengths
match path = (:SupplyChain {id:20})-[:STARTS_WITH]->(startstage:Stage)-[:PRECEDES*]->(endstage:Stage)
where not ((endstage)-[:PRECEDES]->())
return distinct count(path), length(path);

//Visualizing the different path lengths for chain nr 20
match path = (:SupplyChain {id:20})-[:STARTS_WITH]->(startstage:Stage)-[:PRECEDES*]->(endstage:Stage)
where not ((endstage)-[:PRECEDES]->())
return path;

//Visualizing the paths with length = 8
//profile: Cypher version: CYPHER 3.5, planner: COST, runtime: SLOTTED. 4200 total db hits in 39 ms.

match path = (:SupplyChain {id:20})-[:STARTS_WITH]->(startstage:Stage)-[:PRECEDES*]->(endstage:Stage)
where not ((endstage)-[:PRECEDES]->()) and length(path) = 8
return path;

//Visualizing the paths with length = 8 with ENDS check
//profile: Cypher version: CYPHER 3.5, planner: COST, runtime: SLOTTED. 2138 total db hits in 11 ms.
match path = (sc:SupplyChain {id:20})-[:STARTS_WITH]->(startstage:Stage)-[:PRECEDES*]->(endstage:Stage)-[:ENDS]-(sc)
where length(path) = 9
return path;

## 4-scm-analytics.cql
//look at cost of paths
match (sc:SupplyChain {id:8})-[:STARTS_WITH]->(startstage:Stage)
with startstage,sc
match path = (startstage)-[:PRECEDES*]->(endstage:Stage)
where (endstage)-[:ENDS]->(sc)
with path, reduce(sum=0, x in nodes(path) | sum+x.stageTime) as totalTime,
reduce(sum=0, y in nodes(path) | sum+y.stageCost) as totalCost
return totalTime, totalCost, [x in nodes(path) | id(x)]
order by totalTime asc
limit 10;


//Calculate the betweenness scores
:param label => ("Stage");
:param relationshipType => ("PRECEDES");
:param limit => (100);
:param config => ({concurrency: 8, direction: "Outgoing", writeProperty: "betweenness"});

CALL algo.betweenness($label, $relationshipType, $config);

MATCH (node:Stage)
WHERE not(node[$config.writeProperty] is null)
RETURN node, node[$config.writeProperty] AS score
ORDER BY score DESC
LIMIT $limit;


//take a look at the nodes with the highest betweenness
MATCH (s:Stage)
WITH s
ORDER BY s.betweenness DESC
LIMIT 1
SET s:BETWEENNESSCHAMP
WITH s
MATCH path = ((sc:SupplyChain)-[:STARTS_WITH|PRECEDES|ENDS*]->(s))
where s in nodes(path)
with sc,s
limit 1
match path = (sc)-[:STARTS_WITH]->(startstage:Stage)-[:PRECEDES*]->(endstage:Stage)-[:ENDS]->(sc)
return path, s
limit 100;

//take a look at the nodes with the highest betweenness in a particular supply chain
MATCH path = ((sc:SupplyChain {id:19})-[:STARTS_WITH|PRECEDES*]->(s))
where s in nodes(path)
with sc,s
match (s)
with s,sc
order by s.betweenness DESC
limit 1
set s:BETWEENNESSCHAMP
with s, sc
match path = (sc)-[:STARTS_WITH]->(startstage:Stage)-[:PRECEDES*]->(endstage:Stage)-[:ENDS]->(sc)
return path, s
limit 100

//take a look at the degree of the nodes with the highest betweenness
MATCH path = ((sc:SupplyChain {id:19})-[:STARTS_WITH|PRECEDES|ENDS*]->(s))
where s in nodes(path)
with sc,s
match (s)
return distinct s.name, apoc.node.degree(s) as degree, s.betweenness as betweenness
order by betweenness DESC
limit 10

//take a look at the betweenness of the nodes ordered by degree
MATCH path = ((sc:SupplyChain {id:19})-[:STARTS_WITH|PRECEDES|ENDS*]->(s))
where s in nodes(path)
with sc,s
match (s)
return distinct s.name, apoc.node.degree(s) as degree, s.betweenness as betweenness
order by degree DESC
limit 10
	// Found article on https://or.stackexchange.com/questions/529/supply-chain-public-data-repository
	// https://pubsonline.informs.org/doi/suppl/10.1287/msom.1070.0176
	// https://pubsonline.informs.org/doi/suppl/10.1287/msom.1070.0176/suppl_file/msom.1070.0176-sm-datainexcel.zip

	//Article by Sean P. Willems: https://pdfs.semanticscholar.org/232c/451fcf58dbcc1527de6d02cd6e76aea9e871.pdf?_ga=2.33151675.429569592.1581427039-1552162479.1581427039
	Table 2 Classifications Used to Label Every Stage in the Chains
	Classifications label Activity
	Dist_ A stage that distributes an item
	Manuf_ A stage that manufactures or assembles an item
	Part_ A stage that procures an item
	Retail_ A stage that acts as a demand orgination point
	Trans_ A stage that transports an item between stages
	//appendix: http://willems.utk.edu/papers/dl/Willems_MSOM_v10n1_Winter2008_Appendix.pdf



	//full spreadsheet: https://drive.google.com/file/d/1jfheHEKIZK1cnkC8vs-M2vQIcFCtEJ1o/view?usp=sharing
	//google sheet: https://docs.google.com/spreadsheets/d/1vD4UCxsincEQYQpAeCbEqnd3QFdKcuL4ukesbyu5RIc/edit#gid=1371172744
	//importing the dataset
	create index on :SupplyChain(id);
	create index on :Stage(name);
	create index on :Stage(betweenness);
	create index on :SIC(code);
	create index on :Company(id);
	create index on :StageClassification(name);

	//load the companies
	load csv with headers from "https://docs.google.com/spreadsheets/u/0/d/1vD4UCxsincEQYQpAeCbEqnd3QFdKcuL4ukesbyu5RIc/export?format=csv&id=1vD4UCxsincEQYQpAeCbEqnd3QFdKcuL4ukesbyu5RIc&gid=0" as csv
	merge (sic:SIC {code: toInt(csv.SIC_Code), desc: csv.SIC_Description})
	merge (c:Company {id: toInt(csv.companyId)})
	merge (ch:SupplyChain {id: toInt(csv.chainId)})
	merge (c)-[:HAS_SIC]->(sic)
	create (c)-[:HAS_CHAIN]->(ch);

	//load the rels between the chain stages
	load csv with headers from
	"https://docs.google.com/spreadsheets/u/0/d/1vD4UCxsincEQYQpAeCbEqnd3QFdKcuL4ukesbyu5RIc/export?format=csv&id=1vD4UCxsincEQYQpAeCbEqnd3QFdKcuL4ukesbyu5RIc&gid=1371172744" as csv
	merge (ss:Stage {name: csv.sourceStage, chainId: toInt(csv.chainId)})
	merge (ds:Stage {name: csv.destinationStage, chainId: toInt(csv.chainId)})
	create (ss)-[:PRECEDES]->(ds);

	//create the STARTS_WITH and ENDS rels between the SupplyChain and the first and last chain stage
	//create STARTS_WITH
	load csv with headers from
	"https://docs.google.com/spreadsheets/u/0/d/1vD4UCxsincEQYQpAeCbEqnd3QFdKcuL4ukesbyu5RIc/export?format=csv&id=1vD4UCxsincEQYQpAeCbEqnd3QFdKcuL4ukesbyu5RIc&gid=1371172744" as csv
	match (sc:SupplyChain {id: toInt(csv.chainId)}), (ss:Stage {name: csv.sourceStage, chainId: toInt(csv.chainId)})
	where not (()-[:PRECEDES]->(ss))
	AND sc.id = ss.chainId
	merge (sc)-[:STARTS_WITH]->(ss);
	//create ENDS
	match path = (sc:SupplyChain)-[:STARTS_WITH]->(startstage:Stage)-[:PRECEDES*]->(endstage:Stage)
	where not ((endstage)-[:PRECEDES]->())
	merge (sc)<-[:ENDS]-(endstage);


	//load the detail of each Stage
	load csv with headers from
	"https://docs.google.com/spreadsheets/u/0/d/1vD4UCxsincEQYQpAeCbEqnd3QFdKcuL4ukesbyu5RIc/export?format=csv&id=1vD4UCxsincEQYQpAeCbEqnd3QFdKcuL4ukesbyu5RIc&gid=579862779" as csv
	match (s:Stage {chainId: toInt(csv.chainId), name: csv.Stage_Name})
	set s.stageCost = toFloat(csv.stageCost)
	set s.relDepth = toInteger(csv.relDepth)
	set s.stageClassification = csv.stageClassification
	set s.avgDemand = csv.avgDemand
	set s.stdDevDemand = csv.stdDevDemand
	set s.maxServiceTime = csv.maxServiceTime
	set s.serviceLevel = csv.serviceLevel
	set s.stageTime = toFloat(csv.stageTime)
	set s.stdDev_stageTime = csv.stdDev_stageTime;



	//split out the stageClassifications
	match (s:Stage)
	merge (sc:StageClassification {name: s.stageClassification})
	create (s)-[:IS_A]->(sc);

	//merge the two "Parts" stageClassifications
	match (sc1:StageClassification {name:"Parts"}), (sc2:StageClassification {name:"Part"})
	WITH head(collect([sc1,sc2])) as nodes
	CALL apoc.refactor.mergeNodes(nodes,{properties:"combine", mergeRels:true}) yield node
	set node.name = "Parts"
	return count(node);


	//model in text
	match (n)-[r]->(m)
	return distinct labels(n), type(r), labels(m);
	//model in visualization
	call db.schema.visualization;
	// data profiling and validation
	match (n) return distinct labels(n), count(*);
	match (n)-[r]->() return distinct type(r), count(*);
	match p= (sc:SupplyChain)<-[:HAS_CHAIN]-(c:Company)-[HAS_SIC]->(s:SIC)
	return p limit 25;
	match (sc:SupplyChain {id:1})-[*..3]-(conn) return sc, conn limit 10;

	// Some SCM queries
	//depth of SC Network by traversing graph
	match path = ((sc:SupplyChain)-[:STARTS_WITH\|PRECEDES*]->(s:Stage))
	where not ((s)-[:PRECEDES]->())
	return distinct sc.id, (max(length(path))-1) as maxlength
	order by maxlength desc;

	//depth of SC network by reading stage properties
	match (sc:SupplyChain)-->(s:Stage)
	return distinct sc.id, max(s.relDepth) as maxlengthprop
	order by maxlengthprop desc;

	//how many start and end nodes
	match (s1:Stage)<-[start:STARTS_WITH]-(sc:SupplyChain)
	with sc, count(start) as starts
	match (sc)<-[end:ENDS]-(s2)
	return sc.id, starts, count(end);

	//verify the number of endstages for a SupplyChain by counting the paths
	match path = (:SupplyChain {id:12})-[:STARTS_WITH]->(startstage:Stage)-[:PRECEDES*]->(endstage:Stage)
	where not ((endstage)-[:PRECEDES]->())
	with distinct endstage
	return count(endstage);

	//Zooming in on chain nr 20 - different path lengths
	match path = (:SupplyChain {id:20})-[:STARTS_WITH]->(startstage:Stage)-[:PRECEDES*]->(endstage:Stage)
	where not ((endstage)-[:PRECEDES]->())
	return distinct count(path), length(path);

	//Visualizing the different path lengths for chain nr 20
	match path = (:SupplyChain {id:20})-[:STARTS_WITH]->(startstage:Stage)-[:PRECEDES*]->(endstage:Stage)
	where not ((endstage)-[:PRECEDES]->())
	return path;

	//Visualizing the paths with length = 8
	//profile: Cypher version: CYPHER 3.5, planner: COST, runtime: SLOTTED. 4200 total db hits in 39 ms.

	match path = (:SupplyChain {id:20})-[:STARTS_WITH]->(startstage:Stage)-[:PRECEDES*]->(endstage:Stage)
	where not ((endstage)-[:PRECEDES]->()) and length(path) = 8
	return path;

	//Visualizing the paths with length = 8 with ENDS check
	//profile: Cypher version: CYPHER 3.5, planner: COST, runtime: SLOTTED. 2138 total db hits in 11 ms.
	match path = (sc:SupplyChain {id:20})-[:STARTS_WITH]->(startstage:Stage)-[:PRECEDES*]->(endstage:Stage)-[:ENDS]-(sc)
	where length(path) = 9
	return path;
	//look at cost of paths
	match (sc:SupplyChain {id:8})-[:STARTS_WITH]->(startstage:Stage)
	with startstage,sc
	match path = (startstage)-[:PRECEDES*]->(endstage:Stage)
	where (endstage)-[:ENDS]->(sc)
	with path, reduce(sum=0, x in nodes(path) \| sum+x.stageTime) as totalTime,
	reduce(sum=0, y in nodes(path) \| sum+y.stageCost) as totalCost
	return totalTime, totalCost, [x in nodes(path) \| id(x)]
	order by totalTime asc
	limit 10;


	//Calculate the betweenness scores
	:param label => ("Stage");
	:param relationshipType => ("PRECEDES");
	:param limit => (100);
	:param config => ({concurrency: 8, direction: "Outgoing", writeProperty: "betweenness"});

	CALL algo.betweenness($label, $relationshipType, $config);

	MATCH (node:Stage)
	WHERE not(node[$config.writeProperty] is null)
	RETURN node, node[$config.writeProperty] AS score
	ORDER BY score DESC
	LIMIT $limit;


	//take a look at the nodes with the highest betweenness
	MATCH (s:Stage)
	WITH s
	ORDER BY s.betweenness DESC
	LIMIT 1
	SET s:BETWEENNESSCHAMP
	WITH s
	MATCH path = ((sc:SupplyChain)-[:STARTS_WITH\|PRECEDES\|ENDS*]->(s))
	where s in nodes(path)
	with sc,s
	limit 1
	match path = (sc)-[:STARTS_WITH]->(startstage:Stage)-[:PRECEDES*]->(endstage:Stage)-[:ENDS]->(sc)
	return path, s
	limit 100;

	//take a look at the nodes with the highest betweenness in a particular supply chain
	MATCH path = ((sc:SupplyChain {id:19})-[:STARTS_WITH\|PRECEDES*]->(s))
	where s in nodes(path)
	with sc,s
	match (s)
	with s,sc
	order by s.betweenness DESC
	limit 1
	set s:BETWEENNESSCHAMP
	with s, sc
	match path = (sc)-[:STARTS_WITH]->(startstage:Stage)-[:PRECEDES*]->(endstage:Stage)-[:ENDS]->(sc)
	return path, s
	limit 100

	//take a look at the degree of the nodes with the highest betweenness
	MATCH path = ((sc:SupplyChain {id:19})-[:STARTS_WITH\|PRECEDES\|ENDS*]->(s))
	where s in nodes(path)
	with sc,s
	match (s)
	return distinct s.name, apoc.node.degree(s) as degree, s.betweenness as betweenness
	order by betweenness DESC
	limit 10

	//take a look at the betweenness of the nodes ordered by degree
	MATCH path = ((sc:SupplyChain {id:19})-[:STARTS_WITH\|PRECEDES\|ENDS*]->(s))
	where s in nodes(path)
	with sc,s
	match (s)
	return distinct s.name, apoc.node.degree(s) as degree, s.betweenness as betweenness
	order by degree DESC
	limit 10