Jorrit Poelen jhpoelen

## calculateKingdomToKingdomInteractions.scala
val taxa = spark.read.option("delimiter","""\t""").option("header","true").csv("taxonCache.tsv.bz2")

taxa.printSchema
import spark.implicits._
val taxonCache = spark.read.option("delimiter","""\t""").option("header","true").csv("taxonCache.tsv.bz2")
val taxonIdsPaths = taxonCache.select("id", "pathNames", "path").as[(String, String, String)].filter(_._2 != null).filter( _._3 != null).filter(_._1 != null)

val taxaIdToKingdom = taxonIdsPaths.map( r=> (r._1, r._2.split("\\|").map(_.trim), r._3.split("\\|").map(_.trim))).map(r => (r._1, r._2.zip(r._3))).map(r => (r._1, r._2.filter(_._1 == "kingdom").map(_._2).mkString)).filter(_._2.nonEmpty).filter(r => List("GBIF", "ITIS","WORMS", "INAT_TAXON").contains(r._1.split(":").head)).filter(_._2 != "incertae sedis")

taxaIdToKingdom.write.option("delimiter","""\t""").csv("taxaIdToKingdom.tsv")

## resp.json
{
  "conceptrecid": "1486278",
  "created": "2018-11-14T00:29:34.856766+00:00",
  "files": [],
  "id": 1486279,
  "links": {
    "bucket": "https://zenodo.org/api/files/35cfca90-d31f-4b36-b91a-8def579ca410",
    "discard": "https://zenodo.org/api/deposit/depositions/1486279/actions/discard",
    "edit": "https://zenodo.org/api/deposit/depositions/1486279/actions/edit",
    "files": "https://zenodo.org/api/deposit/depositions/1486279/files",

## register_hashes_with_hash_archive.sh
#!/bin/bash
# Register all preston urls with hash-archive.org
#
# Please replace "deeplinker\.bio" instances below with you own escaped hostname of your Preston instance.

# see https://preston.guoda.bio on how to install preston
#

preston ls -l tsv | grep Version | cut -f1,3 | tr '\t' '\n' | grep -v "deeplinker\.bio/\.well-known/genid" | sort | uniq | sed -e 's/hash:\/\/sha256/https:\/\/deeplinker.bio/g' | sed -e 's/^/https:\/\/hash-archive.org\/api\/enqueue\//g' | xargs -L1 curl

## README
interactions.tsv.gz
    Contains pairwise interactions generated by elton 0.5.0 on 2018-06-29 .
    Generated by dietmatrix.sh .

fbPredPreyOrder.tsv.gz
    Contains prey/diet items of species known to fishbase. For prey/diet items, the linked order(s) are included as well as the resolved prey/diet item terms. Calculated by dietmatrix.sh .

majorityOrders.tsv
    Majority order were calculate by selecting the most frequently occurring order associated with a specific prey id/name.
    If different order assignments for a particular prey item have same frequency, the orders are sorted in alphabetical order and the first is selected.

## exportNanoPubs.sh
#!/bin/bash
#
# Example of how to create trustry nanopubs from species interaction data using elton and nanopub-java
#

echo download elton tool...
curl -L "https://github.com/globalbioticinteractions/elton/releases/download/0.4.1/elton.jar" > elton.jar
echo download elton tool done.

# you can also use https://github.com/globalbioticinteractions/elton-archive to retrieve archived datasets from the internet archive

## dietMatrix.R
# add code

## .gitignore
.idea

## gist:39d866721bb35a63d0e9b99073c5e8b2
#install.packages('rgbif')

fresh <- read.csv('Fresh.species.csv')

#fresh <- data.frame(predator.taxon.name = c('Arius felis', 'Ariopsis felis', 'Gadus morhua'))

# appends columns gbifSpeciesKey, gbifOccCount when gbif knows about species and has occurrences
appendOccCount <- function(df) {
  names <- df$predator.taxon.name

## coastalPredators.R
rglobi::query('START predatorTaxon = node:taxons(name="Homo sapiens")
MATCH predatorTaxon<-[:CLASSIFIED_AS]-predator-[:ATE|PREYS_ON]->prey-[:CLASSIFIED_AS]->preyTaxon,
predator-[:COLLECTED_AT]->location-[:HAS_ENVIRONMENT]->envo
WHERE envo.name= "coast"
WITH envo, preyTaxon, predatorTaxon
MATCH preyTaxon<-[:CLASSIFIED_AS]-pred2-[:ATE]->prey2-[:CLASSIFIED_AS]->preyOfPreyTaxon
RETURN envo.externalId, envo.name, preyTaxon.commonNames?, preyTaxon.path?, predatorTaxon.name as predatorName, preyTaxon.name as preyName, count(distinct preyOfPreyTaxon.name) as preyOfPreyCount')

## completeNerd.html
<html>
<head>
    <link rel="stylesheet" href="http://libs.cartocdn.com/cartodb.js/v3/themes/css/cartodb.css"/>
    <script src="http://libs.cartocdn.com/cartodb.js/v3/cartodb.js"></script>
    <script src="https://rawgit.com/jhpoelen/eol-globi-data-js/master/globi-data-dist.js"></script>
    <style>
        html, body {
            width: 100%;
            height: 100%;
            padding: 0;
	val taxa = spark.read.option("delimiter","""\t""").option("header","true").csv("taxonCache.tsv.bz2")

	taxa.printSchema
	import spark.implicits._
	val taxonCache = spark.read.option("delimiter","""\t""").option("header","true").csv("taxonCache.tsv.bz2")
	val taxonIdsPaths = taxonCache.select("id", "pathNames", "path").as[(String, String, String)].filter(_._2 != null).filter( _._3 != null).filter(_._1 != null)

	val taxaIdToKingdom = taxonIdsPaths.map( r=> (r._1, r._2.split("\\\|").map(_.trim), r._3.split("\\\|").map(_.trim))).map(r => (r._1, r._2.zip(r._3))).map(r => (r._1, r._2.filter(_._1 == "kingdom").map(_._2).mkString)).filter(_._2.nonEmpty).filter(r => List("GBIF", "ITIS","WORMS", "INAT_TAXON").contains(r._1.split(":").head)).filter(_._2 != "incertae sedis")

	taxaIdToKingdom.write.option("delimiter","""\t""").csv("taxaIdToKingdom.tsv")
	{
	"conceptrecid": "1486278",
	"created": "2018-11-14T00:29:34.856766+00:00",
	"files": [],
	"id": 1486279,
	"links": {
	"bucket": "https://zenodo.org/api/files/35cfca90-d31f-4b36-b91a-8def579ca410",
	"discard": "https://zenodo.org/api/deposit/depositions/1486279/actions/discard",
	"edit": "https://zenodo.org/api/deposit/depositions/1486279/actions/edit",
	"files": "https://zenodo.org/api/deposit/depositions/1486279/files",
	#!/bin/bash
	# Register all preston urls with hash-archive.org
	#
	# Please replace "deeplinker\.bio" instances below with you own escaped hostname of your Preston instance.

	# see https://preston.guoda.bio on how to install preston
	#

	preston ls -l tsv \| grep Version \| cut -f1,3 \| tr '\t' '\n' \| grep -v "deeplinker\.bio/\.well-known/genid" \| sort \| uniq \| sed -e 's/hash:\/\/sha256/https:\/\/deeplinker.bio/g' \| sed -e 's/^/https:\/\/hash-archive.org\/api\/enqueue\//g' \| xargs -L1 curl
	interactions.tsv.gz
	Contains pairwise interactions generated by elton 0.5.0 on 2018-06-29 .
	Generated by dietmatrix.sh .

	fbPredPreyOrder.tsv.gz
	Contains prey/diet items of species known to fishbase. For prey/diet items, the linked order(s) are included as well as the resolved prey/diet item terms. Calculated by dietmatrix.sh .

	majorityOrders.tsv
	Majority order were calculate by selecting the most frequently occurring order associated with a specific prey id/name.
	If different order assignments for a particular prey item have same frequency, the orders are sorted in alphabetical order and the first is selected.
	#!/bin/bash
	#
	# Example of how to create trustry nanopubs from species interaction data using elton and nanopub-java
	#

	echo download elton tool...
	curl -L "https://github.com/globalbioticinteractions/elton/releases/download/0.4.1/elton.jar" > elton.jar
	echo download elton tool done.

	# you can also use https://github.com/globalbioticinteractions/elton-archive to retrieve archived datasets from the internet archive
	#install.packages('rgbif')

	fresh <- read.csv('Fresh.species.csv')

	#fresh <- data.frame(predator.taxon.name = c('Arius felis', 'Ariopsis felis', 'Gadus morhua'))

	# appends columns gbifSpeciesKey, gbifOccCount when gbif knows about species and has occurrences
	appendOccCount <- function(df) {
	names <- df$predator.taxon.name
	rglobi::query('START predatorTaxon = node:taxons(name="Homo sapiens")
	MATCH predatorTaxon<-[:CLASSIFIED_AS]-predator-[:ATE\|PREYS_ON]->prey-[:CLASSIFIED_AS]->preyTaxon,
	predator-[:COLLECTED_AT]->location-[:HAS_ENVIRONMENT]->envo
	WHERE envo.name= "coast"
	WITH envo, preyTaxon, predatorTaxon
	MATCH preyTaxon<-[:CLASSIFIED_AS]-pred2-[:ATE]->prey2-[:CLASSIFIED_AS]->preyOfPreyTaxon
	RETURN envo.externalId, envo.name, preyTaxon.commonNames?, preyTaxon.path?, predatorTaxon.name as predatorName, preyTaxon.name as preyName, count(distinct preyOfPreyTaxon.name) as preyOfPreyCount')
	<html>
	<head>
	<link rel="stylesheet" href="http://libs.cartocdn.com/cartodb.js/v3/themes/css/cartodb.css"/>
	<script src="http://libs.cartocdn.com/cartodb.js/v3/cartodb.js"></script>
	<script src="https://rawgit.com/jhpoelen/eol-globi-data-js/master/globi-data-dist.js"></script>
	<style>
	html, body {
	width: 100%;
	height: 100%;
	padding: 0;