Last active
August 29, 2015 14:18
-
-
Save benefacere/936e8ed387c9f7870b4e to your computer and use it in GitHub Desktop.
BulkMobileGoogleCheck
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@Grab(group='org.codehaus.groovy.modules.http-builder', module='http-builder', version='0.7') | |
import groovyx.net.http.AsyncHTTPBuilder | |
import groovyx.net.http.HTTPBuilder | |
//Requete initiale - A personnaliser | |
def query = ['agence','web','nantes'] | |
//Etude sur N premiers elements - A personnaliser | |
def nbRes = 10 | |
// Sites exclus des resultats a analyser - A personnaliser | |
def listExcludeStartsWith = ['http://maps.google.fr', 'https://maps.google.fr', 'https://fr-fr.facebook.com', | |
'http://fr-fr.facebook.com', 'http://www.indeed.fr', 'http://www.118000.fr', 'http://fr.mappy.com', | |
'http://www.pagesjaunes.fr', 'http://www.youtube.com', 'http://www.randstad.fr','http://www.tripadvisor.fr', | |
'http://www.booking.com','http://fr.viadeo.com'] | |
def queryCheck = 'https://www.googleapis.com/pagespeedonline/v3beta1/mobileReady?url=' | |
// Declarations techniques | |
def thPoolReq = new java.util.concurrent.ThreadPoolExecutor(nbRes, nbRes,Long.MAX_VALUE, java.util.concurrent.TimeUnit.NANOSECONDS, | |
new java.util.concurrent.LinkedBlockingQueue<Runnable>(nbRes)) | |
def asyncHttpBuilder = new AsyncHTTPBuilder(threadPool:thPoolReq) | |
def httpBuilder = new HTTPBuilder("http://www.google.fr") | |
// Requete Google et scrap des N premiers resultats | |
def listeRes = [] | |
httpBuilder.request(groovyx.net.http.Method.GET, groovyx.net.http.ContentType.HTML) { req -> | |
uri.path = '/search' | |
uri.query= [num: nbRes, q: query.join('+')] | |
headers.'User-Agent' = 'Mozilla/5.0' | |
response.success = { resp, html -> | |
htmlSrc = html | |
html.depthFirst().findAll { it.name().equalsIgnoreCase('h3') && it.@class?.text()?.equals('r') }?.each {baliseRes -> | |
def lien = baliseRes.A.@href?.text() | |
if (lien.startsWith('/url?q=')) { | |
def url = lien?.substring(lien.indexOf('http'),lien.indexOf('&sa=')) | |
// Analyse de l'URL et de son appartenance a la liste d'exclusions | |
def isExcluded = !listExcludeStartsWith.every { | |
!it?.equals('') ? !url?.startsWith(it) : true | |
} | |
if (!isExcluded) { | |
listeRes.add(url) | |
} | |
} | |
} | |
} | |
response.failure = { resp -> | |
println resp.statusLine | |
} | |
} | |
//Analyse des URLS validees | |
def listeCompCheck = [] | |
def listeCompCheckRes = [] | |
if (listeRes.size() > 0) { | |
println listeRes.size() + ' url vont etre analysees' | |
int pos = 1 | |
listeRes.each {line -> | |
listeCompCheck.add( [req:httpReq(queryCheck + line, asyncHttpBuilder),done:false,line:line,pos:pos++] ) | |
} | |
} | |
int nbdone = 0 | |
while (listeCompCheck.size() != nbdone) { | |
Thread.sleep(3000) | |
println 'En attente des tests de ' + (listeCompCheck.size() - nbdone) + ' pages' | |
listeCompCheck.each {map -> | |
def request = map.req | |
if (request.done && !map.done) { | |
try { | |
def tabres = request.get() | |
def resp = tabres[0] | |
def json = tabres[1] | |
if (json) { | |
def check = json.ruleGroups?.USABILITY?.pass | |
if (check) { | |
listeCompCheckRes.add([pos:map.pos, res:('La page ' + map.line + ' est compatible avec un score de ' + json.ruleGroups?.USABILITY?.score)]) | |
} | |
else { | |
listeCompCheckRes.add([pos:map.pos, res:('La page ' + map.line + ' est incompatible')]) | |
} | |
} | |
else { | |
println 'Erreur url ' + map.line + ', reponse : ' + resp.statusLine | |
} | |
} | |
catch (Exception e) { | |
println 'Erreur url reponse : ' + map.line | |
} | |
nbdone++ | |
map.done = true | |
} | |
} | |
} | |
listeCompCheckRes.sort { it.pos } | |
listeCompCheckRes.each { println 'Rank ' + it.pos + ' - ' + it.res } | |
private httpReq(line, asyncHttpBuilder) { | |
def request = null | |
try { | |
request = asyncHttpBuilder.request(line, groovyx.net.http.Method.GET, groovyx.net.http.ContentType.JSON){req -> | |
headers.'User-Agent' = 'Mozilla/5.0' | |
req.getParams().setParameter("http.connection.timeout", 5000) | |
//On laisse 60 secondes pour l'execution du test | |
req.getParams().setParameter("http.socket.timeout", 60000) | |
response.failure = { resp -> | |
return [resp, null] | |
} | |
response.success = { resp, reader -> | |
return [resp, reader] | |
} | |
} | |
} | |
catch(Exception e) { | |
println 'Erreur url requete : ' + line | |
} | |
return request | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment