Skip to content

Instantly share code, notes, and snippets.

@crazy4groovy
Last active May 18, 2021 21:34
Show Gist options
  • Save crazy4groovy/10643855 to your computer and use it in GitHub Desktop.
Save crazy4groovy/10643855 to your computer and use it in GitHub Desktop.
Get photos from deviantart gallery(s), download to local folder or export url list to local file
/*
* author: crazy4groovy
* description: given a list of 1+ deviant art gallery URLs (semicolon separated),
* will parse out a list of all full size images found.
* license: MIT, no warranties or guarantees!
*/
import static groovyx.gpars.GParsPool.withPool as parallel
/////////////////////////////////
List urls = args.size() > 0 ?
args[0].split(';')*.trim() :
['http://name_of_account.deviantart.com/gallery/?set=gallery_id_number']
String fileOutPath = args.size() > 1 ? args[1] : /C:\out.txt/
int maxCount = args.size() > 2 ? args[2].toInteger() : 2000
int threads = args.size() > 3 ? Math.min(args[3].toInteger(), 4) : 2
String groupSeparator = args.size() > 4 ? args[4] : File.separator
/////////////////////////////////
println "$fileOutPath $maxCount $threads $groupSeparator"
int offsetInterval = 24
Map imgMap = ([:].withDefault{[] as LinkedHashSet})
Map retriesMap = [:].withDefault{0}
int timeoutTries = 5;
imgMap = Collections.synchronizedMap(imgMap)
parallel(threads) {
urls.eachParallel { url ->
println "START *** ${url} *****>>"
String picGroup = getPicGroup(url)
if (!picGroup) {
println "Invalid URL, please check it"
return
}
url = java.net.URLEncoder.encode(url + (!url.contains('?') ? '?' : '&'))
int prevSize = 0
int count = 0
int page = 0
while (count < maxCount) {
String u = url + 'offset=' + (page*offsetInterval)
//println u
String yql = """https://query.yahooapis.com/v1/public/yql?diagnostics=true&q=select%20*%20from%20html%20where%20xpath%3D%27%2F%2Fa%5B%40data-super-img%5D%27and%20url%3D'${u}'"""
println "{{{ $yql }}}"
//println (yql.toURL().text)
Thread.sleep(1000 * (retriesMap[yql] ?: 0) + 100)
def root
try {
root = new XmlSlurper().parse(yql)
}
catch (java.io.IOException ex) {
println ("caught IOException! $picGroup -- $ex")
break;
}
catch (Exception ex) {
println ("caught Exception! $picGroup -- $ex")
break;
}
String err = root.diagnostics.url?.@error
if (err != "") {
if (retriesMap[yql] < timeoutTries) {
println "! timed out :: $err -- retrying $picGroup pg#$page"
retriesMap[yql] = retriesMap[yql] + 1
}
else {
println "!! timed out :: $err -- SKIPPING $picGroup pg#$page"
if (err != "Read timed out") break;
page++
}
continue;
}
println "found: ${root.results.a.size()}"
List res = root.results.a
//.each {println it.@'data-super-img'.text()}
.collect{it.@'data-super-img'.text()}
//.findAll{it.contains(error)}*.replaceAll(error, '/')
//println res.size()
if (!res) break;
imgMap[picGroup].addAll(res)
count += res.size()
if (prevSize == count) break;
else
prevSize = count
println "?? $picGroup pg#$page (${(page*offsetInterval)}) :: found: ${root.results.a.size()}, total size: $count >> set grand total size: ${imgMap.values().flatten().size()}"
page++
}
println "END : ${url}; #$count"
}
}
println "*SET GRAND TOTAL img list: (${imgMap.values().flatten().size()})*"
File o = new File(fileOutPath)
if (o.isDirectory()) {
def ant = new AntBuilder()
imgMap.each { groupName, imgSet ->
String rootPath = o.absolutePath + File.separator + groupName + groupSeparator
rootPath = rootPath.replaceAll(/[\*\?"<>|=]/,'_')
if (groupSeparator == File.separator
&& (!(new File(rootPath).directory))) {
(new File(rootPath)).mkdir()
}
parallel(4) {
imgSet.eachParallel {
String imgName = it.split('/')[-1]
try {
ant.get(src: it, dest: rootPath + imgName, skipexisting: 'true')
}
catch (Exception ignore) { }
}
}
}
}
else {
print "Directory $fileOutPath does not exist, writing to file..."
o << ''
//o.delete()
o << imgMap.values().flatten().join('\n') + '\n'
println "DONE"
}
String getPicGroup( String url ) {
String regex = /^(.+).deviantart.com(.+?)(\?.*?q=(\w+))?$/
url -= 'http://'
def m = (url =~ regex)
try {
String name = m[0][1] + m[0][2] + (m[0][4] ?: '')
return (name.replaceAll('/',' ').trim().replaceAll(' ','_'))
} catch (Exception ignore) {}
}
@Tsene
Copy link

Tsene commented Aug 9, 2014

Good work here. The script works excellent.

@crazy4groovy
Copy link
Author

Thanks! Just added feature to download the images as well.

@delgadillouski
Copy link

Forget that stupid question. I should learn that before any question notice the code language.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment