Skip to content

Instantly share code, notes, and snippets.

@bhavjot
Last active May 25, 2017 07:08
Show Gist options
  • Save bhavjot/94d9123c6514b9217fabbb689e7b2f5d to your computer and use it in GitHub Desktop.
Save bhavjot/94d9123c6514b9217fabbb689e7b2f5d to your computer and use it in GitHub Desktop.
############ These can be changed ########################
#Homepage to start the crawl - n.b. the / forwardslash matters....
$homepage = "qds.html"
#Used as validation for relative links, this also stops the site crawling outside of this domain (by breaking the URL for anythign other #than $rawdomain)
$rawdomain = ""
$outputfile = "E:\scripts\listofURLs.txt"
$secretfile = "E:\scripts\secret1.txt"
#Depth to drill into the website, this is actually the amount of times to loop through the hashtable (While adding new entries as we go)
$loopnumber = 50
#write-host $loopnumber
############ End # These can be changed # End ############
#Arrays to contains the data
$UrlHash = @{}
$TempURLHash =@{}
$TempURLHash1 =@{}
$TempURLHash2 =@{}
$SecretHash =@{}
$TempSecretHash =@{}
#This does the raw crawl of $urls passed into it
function global:FindURL($url){
return @((invoke-webrequest -uri $url).links.href)
write-host $url
}
#This does the raw crawl of $urls passed into it and return text
function global:FindSecret($url){
$secretText= @((invoke-webrequest -uri $url).AllElements | Where {$_.Tagname -eq "font"})
if($secretText -eq $null) {return ""}
else {
write-host $secretText.innerText -foregroundcolor cyan
return $secretText.innerText
#$SecretHash += @{$text = $text}
}
write-host $url
}
#Add everything after 1st level to the hashtable
function CallFromTheHash ($HashURL) {
Clear-Variable -name "tempURLHash2"
FindUrl -url $HashURL Where-Object { -not $UrlHash[$_] } | ForEach-Object {$TempURLHash2+= @{$_ = $_} }
$text = FindSecret -url $HashURL
if( $text -ne "" -and $text -ne $null){
$SecretHash[$text] = $text
write-host $text -foregroundcolor green
write-host $secrethash.count -foregroundcolor white
}
#Where-Object { -not $UrlHash.ContainsKey($_) } |
foreach ($temphalfurl in $TempURLHash2.Keys) {
if( -not $UrlHash[$temphalfurl])
{
$URLHash[$temphalfurl] = $temphalfurl
}
}
#Logging to screen
#get-date
#$UrlHash.count
#write-host $HashURL
}
#Add homepage to hashtable
$UrlHash[$homepage] = $homepage
$UrlHash.count
$fullhomepage = $rawdomain + $homepage
#Call funtion with $homepage and add results to hashtable
FindUrl -url $fullhomepage Where-Object { -not $UrlHash[$_] } | ForEach-Object { $UrlHash[$_] = $_ }
$te = FindSecret -url $fullhomepage
if( $te -ne "" -and $te -ne $null){
$SecretHash[$te] = $te
write-host $te -foregroundcolor green
write-host $secrethash.count -foregroundcolor white
}
#Where-Object { -not $SecretHash[$_] } | ForEach-Object {$SecretHash[$_] = $_} }
$UrlHash[$homepage] = "processed"
#Loop through hashtable contents
$i = 2
For(;$i -le $loopnumber; )
{
#$UrlHash.keys | ForEach-Object {Write-Host $_ -foregroundcolor cyan}
#$UrlHash.values | ForEach-Object {Write-Host $_}
#clone the $urlhash hash table to another hashtable so we can loop through and pass to CallFromTheHash where it's added to $urlhash
$masterArray = $urlhash.keys
Clear-Variable -name "tempURLHash"
foreach ($halfurl in $masterArray) {
if($urlhash[$halfurl] -eq $halfurl)
{
$tempURLHash += @{$halfurl = $halfurl}
#write-host $tempURLHash.count
}
}
if($tempURLHash.count -eq 0) {break}
#Add domain to urls in the hashtable
foreach ($newhalfurl in $tempURLHash.keys) {
#Add domain to URL - validates that the crawl stays on the $rawdomain and all upper domains
if(-not($newhalfurl.contains($rawdomain))){
$fullhashurl = $rawdomain + $newhalfurl}
CallFromTheHash $fullhashurl
$urlhash[$newhalfurl] = "processed"
}
#Hit the loop again - see $loopnumber for the loop count - this will dive another layer into the site, got as deep as you like.
Write-Host "Looping AGAIIIIINNNN" -ForegroundColor red -backgroundcolor DarkYellow
$i++
}
#Get contents of $urlhash and append it into $TempURLHas1 ready for logging
foreach ($urlfrag in $urlhash.keys) {
$fullurl = $urlfrag
$TempURLHash1 += @{$fullurl = "processed"}
$TempURLHash1.count
}
#Get contents of $secrethash and append it into $TempURLHas1 ready for logging
$secrethash.keys | ForEach-Object {Write-Host $_ -foregroundcolor cyan}
foreach ($urlfrag in $secrethash.keys) {
$fullurl = $urlfrag
$tempsecrethash += @{$fullurl = $fullurl}
$tempsecrethash.count
}
#Output of URL results
$TempURLHash1 | out-file -width 900 $outputfile
#Output of Secret results
$tempsecrethash.keys | out-file -width 1200 $secretfile
@bhavjot
Copy link
Author

bhavjot commented May 25, 2017

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment