-
-
Save orchee/1532f05a5ccc23a942af49f6ad0b15d6 to your computer and use it in GitHub Desktop.
Roksa penetrator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function Get-RoksaPage(){ | |
Param( | |
[string]$Url | |
) | |
$page = Invoke-WebRequest -Uri $Url | |
return $page | |
} | |
function Get-RoksaStates(){ | |
param( | |
[Parameter(Mandatory=$true)] | |
$Page | |
) | |
$values = ($Page.AllElements | Where-Object {$_.Id -eq "anons_state"}).innerHTML -split "<OPTION value=(.*?)>(.*?)</OPTION>" | |
$states = @() | |
for($i = 1; $i -lt ($values.Length + 1); $i++){ | |
if(($i %3) -eq 0){ | |
$states += @{ID = (($i/3)-1); DisplayName = $values[$i-1]; Value = $values[$i-2]} |% { New-Object object | Add-Member -NotePropertyMembers $_ -PassThru } | |
} | |
} | |
Remove-Variable values | |
return $states | |
} | |
function Get-RoksaAnnoucementTypes(){ | |
param( | |
[Parameter(Mandatory=$true)] | |
$Page | |
) | |
$values = ($Page.AllElements | Where-Object {$_.Id -eq "anons_type"}).innerHTML -split "<OPTION value=(.*?)>(.*?)</OPTION>" | |
$types = @(@{ID = 0; DisplayName = "kobieta"; Value = 0}|% { New-Object object | Add-Member -NotePropertyMembers $_ -PassThru }) | |
for($i = 1; $i -lt ($values.Length + 1); $i++){ | |
if(($i %3) -eq 0){ | |
$types += @{ID = (($i/3)); DisplayName = $values[$i-1]; Value = $values[$i-2]} |% { New-Object object | Add-Member -NotePropertyMembers $_ -PassThru } | |
} | |
} | |
Remove-Variable values | |
return $types | |
} | |
function Get-RoksaSearchResultsPage(){ | |
Param( | |
[Parameter(Mandatory=$true)] | |
[string]$Type, | |
[Parameter(Mandatory=$true)] | |
[string]$State, | |
[Parameter(Mandatory=$true)] | |
[string]$Url | |
) | |
$Page = Get-RoksaPage -Url ("{0}?anons_type={1}&anons_state={2}" -f $Url, $Type, $State) | |
$total = ($Page.AllElements | Where-Object {$_.TagName -match "H2"}).outerText -split "(\d+)" -match "(\d+)" | |
Write-Output ("Znaleziono {0} ogłoszeń." -f $total) | |
Write-Progress -Activity "Lista ogłoszeń" -Status "Tworzenie pełnej listy ogłoszeń" -PercentComplete 0 -SecondsRemaining 100 | |
$timeStamp = [DateTime]::Now | |
$totalPages = (($Page.AllElements | Where-Object {$_.class -match "stronnicowanie"})[0].InnerText -split "(\d+)" -match "(\d+)")[1] | |
$allPages = @() | |
for($i = [int]$totalPages; $i -gt 0; $i--){ | |
Write-Progress -Activity "Lista ogłoszeń" -Status ("Tworzenie pełnej listy ogłoszeń {0}/{1}" -f ([int]$totalPages - $i + 1), $totalPages) -PercentComplete (([int]$totalPages - $i)/[int]$totalPages*100) -SecondsRemaining ((([DateTime]::Now - $timeStamp).TotalSeconds / ([int]$totalPages - $i + 1)) * $i) | |
$Page = Get-RoksaPage -Url ("{0}?anons_type={1}&anons_state={2}&pageNr={3}" -f $Url, $Type, $State, $i) | |
Write-Output ("Szukam na stronie: {0}?anons_type={1}&anons_state={2}&pageNr={3}" -f $Url, $Type, $State, $i) | |
$allPages += $Page.AllElements | Where-Object {$_.href -match "www.roksa.pl/pl/anonse/pokaz/"} | Select-Object href | |
} | |
Write-Progress -Activity "Lista ogłoszeń" -Status ("Tworzenie pełnej listy ogłoszeń {0}/{0}" -f ($totalPages)) -PercentComplete 100 -SecondsRemaining 0 | |
Write-Output ("Znaleziono {0} linki do ogłoszeń z {1}" -f $allPages.Count, $total) | |
Remove-Variable Page, total, totalPages | |
return $allPages | |
} | |
function Get-RoksaAnnouncementInfo(){ | |
param( | |
[Parameter(Mandatory=$true)] | |
[string]$Url, | |
[Parameter(Mandatory=$true)] | |
[string]$Type, | |
[Parameter(Mandatory=$true)] | |
[string]$State | |
) | |
$page = Get-RoksaPage -Url $Url | |
$details = ($page.AllElements | Where-Object {$_.Id -match "anons_details"}).innerHTML -split "<LI>" | |
$city = ($details -match "<SPAN class=dane_anonsu_wyroznij>Miasto:</SPAN>" -split "<SPAN class=dane_anonsu_wyroznij>Miasto:</SPAN>" -split "<DIV class=dane_anonsu_sbox><SPAN class=`"dane_anonsu_fiolet dane_anonsu_tytul`">(.*?)</SPAN></DIV>")[2] | |
$district = ($details -match "<SPAN class=dane_anonsu_wyroznij>Dzielnica:</SPAN>" -split "<SPAN class=dane_anonsu_wyroznij>Dzielnica:</SPAN>" -split "<DIV class=dane_anonsu_sbox><SPAN class=dane_anonsu_value>(.*?)</SPAN></DIV>")[2] | |
$outgoing = ($details -match "<SPAN class=dane_anonsu_wyroznij>Wyjazdy: </SPAN>" -split "<SPAN class=dane_anonsu_wyroznij>Wyjazdy: </SPAN>" -split "<SPAN class=dane_anonsu_value>(.*?)</SPAN>")[2] | |
$age = ($details -match "<SPAN class=dane_anonsu_wyroznij>Wiek: </SPAN>" -split "<SPAN class=dane_anonsu_wyroznij>Wiek: </SPAN>" -split "<SPAN class=dane_anonsu_value>(.*?)</SPAN>")[2] | |
$weight = ($details -match "<SPAN class=dane_anonsu_wyroznij>Waga: </SPAN>" -split "<SPAN class=dane_anonsu_wyroznij>Waga: </SPAN>" -split "<SPAN class=dane_anonsu_value>(.*?)</SPAN>")[2] | |
$height = ($details -match "<SPAN class=dane_anonsu_wyroznij>Wzrost: </SPAN>" -split "<SPAN class=dane_anonsu_wyroznij>Wzrost: </SPAN>" -split "<SPAN class=dane_anonsu_value>(.*?)</SPAN>")[2] | |
$breasts = ($details -match "<SPAN class=dane_anonsu_wyroznij>Biust: </SPAN>" -split "<SPAN class=dane_anonsu_wyroznij>Biust: </SPAN>" -split "<SPAN class=dane_anonsu_value>(.*?)</SPAN>")[2] | |
$languages = ($details -match "<SPAN class=dane_anonsu_wyroznij>Języki: </SPAN>" -split "<SPAN class=dane_anonsu_wyroznij>Języki: </SPAN>" -split "<SPAN class=dane_anonsu_sbox>(.*?)</SPAN>")[2] | |
$cost1h = ($details -match "<SPAN class=`"dane_anonsu_fiolet dane_anonsu_wyroznij dane_anonsu_tytul`">1 godz.:</SPAN>" -split "<SPAN class=`"dane_anonsu_fiolet dane_anonsu_wyroznij dane_anonsu_tytul`">1 godz.:</SPAN>" -split "<SPAN class=`"dane_anonsu_fiolet dane_anonsu_tytul`">(.*?)</SPAN>")[2] | |
$cost15min = ($details -match "<SPAN class=dane_anonsu_wyroznij>15 min.:</SPAN>" -split "<SPAN class=dane_anonsu_wyroznij>15 min.:</SPAN>" -split "<SPAN class=dane_anonsu_value>(.*?)</SPAN>")[2] | |
$cost30min = ($details -match "<SPAN class=dane_anonsu_wyroznij>30 min.:</SPAN>" -split "<SPAN class=dane_anonsu_wyroznij>30 min.:</SPAN>" -split "<SPAN class=dane_anonsu_value>(.*?)</SPAN>")[2] | |
$costAllNight = ($details -match "<SPAN class=dane_anonsu_wyroznij>Całą noc:</SPAN>" -split "<SPAN class=dane_anonsu_wyroznij>Całą noc:</SPAN>" -split "<SPAN class=dane_anonsu_value>(.*?)</SPAN>")[2] | |
$services = ($page.AllElements | Where-Object {$_.class -match "tag"}).innerHTML | |
$allData = @{ | |
Id = $Url.Split("/")[-1]; | |
Typ = $Type; | |
Wojewodztwo = $State; | |
Miasto = $city; | |
Dzielnica = $district; | |
Wyjazdy = $outgoing; | |
Wiek = $age; | |
Waga = $weight; | |
Wzrost = $height; | |
Biust = $breasts; | |
Jezyki = $languages; | |
Godzina = $cost1h; | |
Polgodziny = $cost30min; | |
Kwadrans = $cost15min; | |
Noc = $costAllNight; | |
Uslugi = if($services){[string]($services.Replace("<IMG alt=V src=`"//img8.roksa.pl/10_Dodaj-Anons-ok.png`">", "") -join ", ")} | |
} |% { New-Object object | Add-Member -NotePropertyMembers $_ -PassThru } | |
Remove-Variable page, details, city, district, outgoing, age, weight, height, breasts, languages, cost1h, cost30min, cost15min, costAllNight, services | |
return $allData | |
} | |
function Get-RoksaData(){ | |
Param( | |
[string]$Url = "https://www.roksa.pl/pl/szukaj/", | |
[string]$Path = $env:USERPROFILE + "\Documents", | |
[string]$Type, | |
[string]$State | |
) | |
$page = Get-RoksaPage -Url $Url | |
if(!$Type){ | |
Write-Host "Wybierz typ ogłoszeń, dla których chcesz pobrać dane wprowadzając przyporządkowany numer." -ForegroundColor Yellow | |
$types = Get-RoksaAnnoucementTypes -Page $page | |
$types | Format-Table -HideTableHeaders "ID", "DisplayName" | |
do { | |
try { | |
$numOk = $true | |
[int]$typeId = Read-host "Wprowadź numer typu" | |
} # end try | |
catch {$numOK = $false} | |
} # end do | |
until (($typeId -ge 0 -and $typeId -lt $types.Count) -and $numOK) | |
$Type = $types[$typeId].Value | |
Write-Output ("Wybrano typ ogłoszeń: {0}." -f $types[$typeId].DisplayName) | |
} | |
if(!$State){ | |
Write-Host "Wybierz województwo, dla którego chcesz pobrać dane wprowadzając przyporządkowany numer." -ForegroundColor Yellow | |
$states = Get-RoksaStates -Page $page | |
$states | Format-Table -HideTableHeaders "ID", "DisplayName" | |
do { | |
try { | |
$numOk = $true | |
[int]$stateId = Read-host "Wprowadź numer województwa" | |
} # end try | |
catch {$numOK = $false} | |
} # end do | |
until (($stateId -ge 0 -and $stateId -lt $states.Count) -and $numOK) | |
$State = $states[$stateId].Value | |
Write-Output ("Wybrano województwo: {0}." -f $states[$stateId].DisplayName) | |
} | |
Write-Output "Rozpoczynam wyszukiwanie..." | |
$allResults = Get-RoksaSearchResultsPage -Type $Type -State $State -Url $Url | |
$announcements = @() | |
$timeStamp = Get-Date | |
for($i = $allResults.Count; $i -gt 0; $i--){ | |
Write-Progress -Activity "Zbieranie informacji o anonsach" -SecondsRemaining ($allResults.Count*5) -PercentComplete 0 | |
if($allResults[$i].href){ | |
Write-Progress -Activity "Zbieranie informacji o anonsach" -Status ("Zbieranie informacji o anonsie {0}/{1}..." -f ($allResults.Count - $i + 1), $allResults.Count) -PercentComplete ((($allResults.Count - $i)/[int]$allResults.Count)*100) -SecondsRemaining ((([DateTime]::Now - $timeStamp).TotalSeconds)/([int]$allResults.Count - $i + 1) * $i) | |
$announcements += Get-RoksaAnnouncementInfo -Url ("https:{0}" -f $allResults[$i].href) -Type $types[$typeId].DisplayName -State $states[$stateId].DisplayName | |
} | |
else{ | |
Write-Warning ("Problem z anonsem. Brak odwołania do strony. Pomijam element.") | |
} | |
} | |
Write-Progress -Activity "Zbieranie informacji o anonsach" -SecondsRemaining 0 -PercentComplete 100 | |
$announcements | Format-Table -AutoSize | |
$announcements | Export-Csv -Path ("{0}\{1}-{2}.csv" -f $Path, $types[$typeId].DisplayName, $states[$stateId].Value) -Delimiter ";" -NoTypeInformation -Encoding UTF8 | |
ii $Path | |
} | |
Get-RoksaData |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment