Last active
December 14, 2021 01:01
-
-
Save imoutsatsos/5914c1ebb7eb434a6fc9739a1d64bc19 to your computer and use it in GitHub Desktop.
Finds embedded URLs in a web page and remaps them to https scheme given a few limited logic rules
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*** BEGIN META { | |
"name" : "findAndRemapURL", | |
"comment" : "Finds embedded URLs in a web page and remaps them to https scheme given a few limited logic rules", | |
"parameters" : [ 'vWebPageUrl','viiifCurrentPort','viiifRemapPort'], | |
"core": "2.222.1", | |
"authors" : [ | |
{ name : "Ioannis K. Moutsatsos" } | |
] | |
} END META**/ | |
//Note Includes special case for also remapping iiif http end points to a different https port | |
import java.util.regex.Matcher | |
import java.util.regex.Pattern | |
import hudson.model.* | |
jenkinsUri=new URI(jenkins.model.Jenkins.instance.getRootUrl()) | |
jenkinsLocalHost=jenkinsUri.getHost() | |
//Pattern urlPattern = Pattern.compile("\\b(https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]",Pattern.CASE_INSENSITIVE); | |
Pattern urlPattern = Pattern.compile("\\b(https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:.;]*[-a-zA-Z0-9+&@#/%=~_|]",Pattern.CASE_INSENSITIVE); | |
artifactURL=vWebPageUrl | |
iiifCurrentPort=viiifCurrentPort | |
iifRemapPort=viiifRemapPort | |
//example Jenkins URLs pointing to artifacts and reports with embedded http://myJenkinsServer.net:8080 style Jenkins references | |
//"https://myJenkinsServer.net/job/IMAGELIST_GALLERIES/93/artifact/reports/Column-10_AND_RowNumber-4_AND_FieldIndex-4_AND_DNA-PROBE_Contribute_ImageList_748_20211012121922.html" | |
//"https://myJenkinsServer.net/job/JOB_CONFIG_ANALYZER/875/artifact/buildReportData/paramProps.csv" | |
//"https://myJenkinsServer.net/job/JOB_CONFIG_ANALYZER/875/console" | |
//"https://myJenkinsServer.net.net/job/UTIL_CSVDATA_REVIEW/15/artifact/dataTableSkipRows.html" | |
def urlText= new URL(artifactURL).getText() | |
def reMap=[:] //a list maintaining URI remappings | |
Matcher matcher = urlPattern.matcher(urlText); | |
while (matcher.find()) { | |
urlMap=[scheme:"", host:"", port:"", path:"", query:null, fragment:null] | |
String address = matcher.group() | |
URI uri = new URI(address); | |
urlMap.scheme=(uri.getScheme()=='http')?'https':uri.getScheme() | |
urlMap.host=(uri.getHost()=='localhost')?jenkinsLocalHost:uri.getHost() | |
urlMap.port=(uri.getPort()==viiifCurrentPort as int)?viiifRemapPort as int:uri.getPort() | |
urlMap.port=(uri.getPort()==8080)?-1:urlMap.port | |
urlMap.path=uri.getPath() | |
//Remapping iiif end points it's a bit trickier see: https://iiif.io/api/image/3.0/#3-identifier | |
if (urlMap.path.startsWith('/iiif/2/')){ | |
println 'Got iiif EndPoint,port:'+uri.getPort()+'\r\t'+address | |
urlMap.path='/iiif/2/'+ (address.split('/iiif/2/')[-1]) | |
remappedUri=new URI(urlMap.scheme+'://'+urlMap.host+':'+urlMap.port+urlMap.path) | |
println 'New iiif EndPoint: ' | |
println '\t'+ remappedUri | |
println '-'*10 | |
}else{ | |
String domain = uri.getHost(); | |
println("Got URL,port:"+uri.getPort() +'\r\t'+ address) | |
remappedUri=new URI(urlMap.scheme,null,urlMap.host,urlMap.port,urlMap.path,null,urlMap.fragment) | |
println("New URL: ") | |
println '\t'+remappedUri | |
println '-'*10 | |
} | |
reMap.put(address, remappedUri.toString()) | |
} | |
reMap.each{k,v-> | |
urlText=urlText.replace(k ,v) | |
} | |
return urlText | |
// References: | |
// https://gist.githubusercontent.com/sagrawal31/55451ee85130f2dcda8e/raw/974800a6a52c2aea0d0521ece031d10a6950a1c3/DownloadURLs.groovy | |
// 1. http://stackoverflow.com/questions/5713558/detect-and-extract-url-from-a-string | |
// 2. http://stackoverflow.com/questions/4674995/groovy-download-image-from-url |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment