Skip to content

Instantly share code, notes, and snippets.

@RC1140
Created January 14, 2011 16:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save RC1140/779799 to your computer and use it in GitHub Desktop.
Save RC1140/779799 to your computer and use it in GitHub Desktop.
$comics = @()
$noItemsFound = 'No products currently available in here.'
$itemCheck = ''
$count = 1
while($itemCheck -ne $noItemsFound){
$page = (new-object System.Net.Webclient).DownloadString("http://www.awx.co.za/e107_plugins/shop/show_cat.php?lettersort=A&catpath=64/Comics&page=$count")
if($page.indexof($noItemsFound) -gt -1){
$itemCheck = $noItemsFound
break;
}
write-host "Scraping Page $count"
$startIndex = $page.IndexOf("main_section")
$startIndex = $page.IndexOf("main_section",$startIndex + 20)
$endIndex = $page.IndexOf("</table>",$startIndex)
$table = $page.substring($startIndex, $endIndex - $startIndex)
$rows = $table -split "<tr>"
foreach($row in $rows){
$columnDetails = $row -split "<td "
if($columnDetails.length -gt 1){
$titleIndex = $columnDetails[2].indexof("title=") + "title=".length
$titleEndIndex = $columnDetails[2].indexof(">",$titleIndex)
#Write out the items name
$comicName = $columnDetails[2].substring($titleIndex, $titleEndIndex - $titleIndex).replace("`"","")
if($comicName.startswith('=')){
continue
}
if($columnDetails[2].indexof("green") -gt -1 ){
$comicAvailable = "In Stock"
}else{
$comicAvailable = "out Of Stock"
}
#Write out the items price
$comicPrice = $columnDetails[3].substring($columnDetails[3].indexof(">")+1).replace("</td>","")
$comic = New-Object System.Object
$comic | Add-Member -type NoteProperty -name Name -value $comicName
$comic | Add-Member -type NoteProperty -name Price -value $comicPrice
$comic | Add-Member -type NoteProperty -name Available -value $comicAvailable
$comics += $comic
}
}
$count++
}
Export-Clixml -Path 'awxComics.xml' -InputObject $comics
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment