Skip to content

Instantly share code, notes, and snippets.

@Leward
Created July 16, 2015 14:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Leward/c93f6e3265a23119934f to your computer and use it in GitHub Desktop.
Save Leward/c93f6e3265a23119934f to your computer and use it in GitHub Desktop.
Vos scripts sous stéroïdes avec Groovy
// Load Jsoup used the get html pages and query them
@Grab("org.jsoup:jsoup:1.8.2")
import org.jsoup.Jsoup
// Load Neo4j JDBC to load data and query a Neo4j database
@GrabResolver(name='neo4j-public', root='http://m2.neo4j.org/content/groups/public')
@Grab("org.neo4j:neo4j-jdbc:2.1.4")
import org.neo4j.jdbc.Driver
import groovy.sql.Sql
// This script regulary get the articles on the home of slate.com website
// The titles are then persisted into a database
// Get the articles titles on the Slate home page
println "Reading articles on Slate"
def document = Jsoup.connect("http://slate.com/").get();
def h1Elements = document.select("article h1")
def titles = []
h1Elements.each { titles.add(it.text()) }
println "Found ${titles.size()} articles"
// Put the article titles into a database
def sql = Sql.newInstance('jdbc:neo4j://localhost:7474/')
titles.each {
def cypherQuery = '''
MERGE (w:`Website` {name: {1}})
MERGE (d:`Day` {date: {2}})
CREATE
(a:`Article` {3}),
(a)-[:`PUBLISHED_ON`]->(w),
(a)-[:`PUBLISHED_AT`]->(d)
'''
sql.execute(cypherQuery, [
"slate.com",
new Date().format("YYYY/MM/dd"),
[title:it]
])
println "Inserted: ${it}"
}
println "Script completed. "
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment