Skip to content

Instantly share code, notes, and snippets.

@benefacere
Last active August 29, 2015 14:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benefacere/936e8ed387c9f7870b4e to your computer and use it in GitHub Desktop.
Save benefacere/936e8ed387c9f7870b4e to your computer and use it in GitHub Desktop.
BulkMobileGoogleCheck
@Grab(group='org.codehaus.groovy.modules.http-builder', module='http-builder', version='0.7')
import groovyx.net.http.AsyncHTTPBuilder
import groovyx.net.http.HTTPBuilder
//Requete initiale - A personnaliser
def query = ['agence','web','nantes']
//Etude sur N premiers elements - A personnaliser
def nbRes = 10
// Sites exclus des resultats a analyser - A personnaliser
def listExcludeStartsWith = ['http://maps.google.fr', 'https://maps.google.fr', 'https://fr-fr.facebook.com',
'http://fr-fr.facebook.com', 'http://www.indeed.fr', 'http://www.118000.fr', 'http://fr.mappy.com',
'http://www.pagesjaunes.fr', 'http://www.youtube.com', 'http://www.randstad.fr','http://www.tripadvisor.fr',
'http://www.booking.com','http://fr.viadeo.com']
def queryCheck = 'https://www.googleapis.com/pagespeedonline/v3beta1/mobileReady?url='
// Declarations techniques
def thPoolReq = new java.util.concurrent.ThreadPoolExecutor(nbRes, nbRes,Long.MAX_VALUE, java.util.concurrent.TimeUnit.NANOSECONDS,
new java.util.concurrent.LinkedBlockingQueue<Runnable>(nbRes))
def asyncHttpBuilder = new AsyncHTTPBuilder(threadPool:thPoolReq)
def httpBuilder = new HTTPBuilder("http://www.google.fr")
// Requete Google et scrap des N premiers resultats
def listeRes = []
httpBuilder.request(groovyx.net.http.Method.GET, groovyx.net.http.ContentType.HTML) { req ->
uri.path = '/search'
uri.query= [num: nbRes, q: query.join('+')]
headers.'User-Agent' = 'Mozilla/5.0'
response.success = { resp, html ->
htmlSrc = html
html.depthFirst().findAll { it.name().equalsIgnoreCase('h3') && it.@class?.text()?.equals('r') }?.each {baliseRes ->
def lien = baliseRes.A.@href?.text()
if (lien.startsWith('/url?q=')) {
def url = lien?.substring(lien.indexOf('http'),lien.indexOf('&sa='))
// Analyse de l'URL et de son appartenance a la liste d'exclusions
def isExcluded = !listExcludeStartsWith.every {
!it?.equals('') ? !url?.startsWith(it) : true
}
if (!isExcluded) {
listeRes.add(url)
}
}
}
}
response.failure = { resp ->
println resp.statusLine
}
}
//Analyse des URLS validees
def listeCompCheck = []
def listeCompCheckRes = []
if (listeRes.size() > 0) {
println listeRes.size() + ' url vont etre analysees'
int pos = 1
listeRes.each {line ->
listeCompCheck.add( [req:httpReq(queryCheck + line, asyncHttpBuilder),done:false,line:line,pos:pos++] )
}
}
int nbdone = 0
while (listeCompCheck.size() != nbdone) {
Thread.sleep(3000)
println 'En attente des tests de ' + (listeCompCheck.size() - nbdone) + ' pages'
listeCompCheck.each {map ->
def request = map.req
if (request.done && !map.done) {
try {
def tabres = request.get()
def resp = tabres[0]
def json = tabres[1]
if (json) {
def check = json.ruleGroups?.USABILITY?.pass
if (check) {
listeCompCheckRes.add([pos:map.pos, res:('La page ' + map.line + ' est compatible avec un score de ' + json.ruleGroups?.USABILITY?.score)])
}
else {
listeCompCheckRes.add([pos:map.pos, res:('La page ' + map.line + ' est incompatible')])
}
}
else {
println 'Erreur url ' + map.line + ', reponse : ' + resp.statusLine
}
}
catch (Exception e) {
println 'Erreur url reponse : ' + map.line
}
nbdone++
map.done = true
}
}
}
listeCompCheckRes.sort { it.pos }
listeCompCheckRes.each { println 'Rank ' + it.pos + ' - ' + it.res }
private httpReq(line, asyncHttpBuilder) {
def request = null
try {
request = asyncHttpBuilder.request(line, groovyx.net.http.Method.GET, groovyx.net.http.ContentType.JSON){req ->
headers.'User-Agent' = 'Mozilla/5.0'
req.getParams().setParameter("http.connection.timeout", 5000)
//On laisse 60 secondes pour l'execution du test
req.getParams().setParameter("http.socket.timeout", 60000)
response.failure = { resp ->
return [resp, null]
}
response.success = { resp, reader ->
return [resp, reader]
}
}
}
catch(Exception e) {
println 'Erreur url requete : ' + line
}
return request
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment