Skip to content

Instantly share code, notes, and snippets.

@eliask
Created January 10, 2020 06:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eliask/3e932469bd29c14dc61d87b1cd0defb8 to your computer and use it in GitHub Desktop.
Save eliask/3e932469bd29c14dc61d87b1cd0defb8 to your computer and use it in GitHub Desktop.
Scrape Vesla measurements: https://wwwp2.ymparisto.fi/vesla/
import os, glob, sys
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.expected_conditions import presence_of_element_located
from selenium.webdriver.support import expected_conditions as EC
if not sys.argv[2:]:
print(f'Usage {sys.argv[0]} <username> <password>')
sys.exit(0)
username, password = sys.argv[1:3]
profile=webdriver.FirefoxProfile()
dir_=f'{os.path.curdir}/vesla-out' # doesn't work?
dir_=f'{os.environ.get("HOME")}/Downloads'
profile.set_preference("browser.download.dir", dir_)
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv")
os.makedirs('vesla-out', exist_ok=True)
driver = browser = webdriver.Firefox(firefox_profile=profile)
browser.get('https://wwwp2.ymparisto.fi/scripts/kirjaudu.asp')
browser.find_element_by_name("userName").send_keys(username)
browser.find_element_by_name("password").send_keys(password + Keys.RETURN)
time.sleep(1) # Required
browser.get('https://wwwp2.ymparisto.fi/scripts/hearts/welcome.asp')
kunnat='''
20 5 9 10 16 18 19 35 43 46 47 49 50 51 52 60 61 62 65 69 71 72 74 75
76 77 78 79 81 82 86 111 90 91 97 98 99 102 103 105 106 108 109 139 140
142 143 145 146 153 148 149 151 152 165 167 169 170 171 172 176 177 178
179 181 182 186 202 204 205 208 211 213 214 216 217 218 224 226 230 231
232 233 235 236 239 240 320 241 322 244 245 249 250 256 257 260 261 263
265 271 272 273 275 276 280 284 285 286 287 288 290 291 295 297 300 301
304 305 312 316 317 318 398 399 400 407 402 403 405 408 410 416 417 418
420 421 422 423 425 426 444 430 433 434 435 436 438 440 441 475 478 480
481 483 484 489 491 494 495 498 499 500 503 504 505 508 507 529 531 535
536 538 541 543 545 560 561 562 563 564 309 576 577 578 445 580 581 599
583 854 584 588 592 593 595 598 601 604 607 608 609 611 638 614 615 616
619 620 623 624 625 626 630 631 635 636 678 710 680 681 683 684 686 687
689 691 694 697 698 700 702 704 707 729 732 734 736 790 738 739 740 742
743 746 747 748 791 749 751 753 755 758 759 761 762 765 766 768 771 777
778 781 783 831 832 833 834 837 844 845 846 848 849 850 851 853 857 858
859 886 887 889 890 892 893 895 785 905 908 911 92 915 918 921 922 924
925 927 931 934 935 936 941 946 976 977 980 981 989 992
'''.strip().split()
wait = WebDriverWait(driver, 10)
def wait_find_elem(selector):
wait.until(EC.presence_of_element_located(
(By.CSS_SELECTOR, selector)))
return browser.find_element_by_css_selector(selector)
def dl_stuff(kunta):
browser.get('https://wwwp2.ymparisto.fi/vesla/Common/rules/SearchRules.aspx')
# Reset selection, if any
try: browser.find_element_by_css_selector('#ContentPlaceHolder1_btnRemove1').click()
except: pass
browser.find_element_by_css_selector('#ContentPlaceHolder1_rptSelect_btnRule_0').click()
wait_find_elem('#ContentPlaceHolder1_rptSelect_pnlRules_0 :nth-child(5)').click()
# Reset selection
wait_find_elem('#ContentPlaceHolder1_SelectControl1_btnRemoveAll').click()
time.sleep(0.5)
# Select it
wait_find_elem(f'option[value="{kunta}"]').click()
time.sleep(0.5)
wait_find_elem('#ContentPlaceHolder1_SelectControl1_btnAdd').click()
wait_find_elem('#ContentPlaceHolder1_SelectControl1_lstSelected option')
# Accept the filter
browser.find_element_by_css_selector('#ContentPlaceHolder1_btnSelect').click()
# Main view -> go to results
wait_find_elem('#ContentPlaceHolder1_Button0server').click()
# Select all rows?
time.sleep(1.0)
elem = wait_find_elem('#ContentPlaceHolder1_chkSelectAll')
if not elem.is_selected(): elem.click()
time.sleep(1.5)
# No data: skip this.
if wait_find_elem('#ContentPlaceHolder1_lblTitle').text == 'Hakutulos: 0 paikkaa':
return
# Export to Excel
wait_find_elem('[value="sas"]').click()
wait_find_elem('#ContentPlaceHolder1_SendRightAllPeriod').click()
wait_find_elem('#ContentPlaceHolder1_SelectedDatePeriods option')
wait_find_elem('#ContentPlaceHolder1_SendRightAllSeason').click()
wait_find_elem('#ContentPlaceHolder1_SelectedSeasons option')
wait_find_elem('#ContentPlaceHolder1_SendRightAllLayer').click()
wait_find_elem('#ContentPlaceHolder1_SelectedLayers option')
wait_find_elem('#ContentPlaceHolder1_SendRightAllQuantity').click()
wait_find_elem('#ContentPlaceHolder1_SelectedQuantities option')
output_files = glob.glob(f'{dir_}/Stats_*.csv')
wait_find_elem('#ContentPlaceHolder1_btnSave').click()
for _ in range(600):
time.sleep(1)
new_output_files = glob.glob(f'{dir_}/Stats_*.csv')
if len(new_output_files) > len(output_files):
break
visited = []
for kunta in kunnat:
if kunta in visited: continue
dl_stuff(kunta)
visited += [kunta]
print(f'Downloaded data for municipality: {kunta}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment