Skip to content

Instantly share code, notes, and snippets.

@dhaeb dhaeb/scrape.groovy

Created Oct 8, 2017
Embed
What would you like to do?
Scraping Bundestagswahlergebnis Ost/West
@Grapes(
@Grab(group='net.sourceforge.htmlunit', module='htmlunit', version='2.27')
)
import com.gargoylesoftware.htmlunit.WebClient
import com.gargoylesoftware.htmlunit.html.HtmlPage
def isOsten = {bl ->
def ostenLabelBl = ["Sachsen", "Thüringen", "Sachsen-Anhalt", "Mecklenburg-Vorpommern", "Berlin", "Brandenburg"]
if (bl in ostenLabelBl)
"Osten"
else
"Westen"
}
final WebClient webClient = new WebClient()
def data = (1..16).collect { it ->
println(it)
webClient.getOptions().setJavaScriptEnabled(false);
final HtmlPage page = webClient.getPage("https://www.bundeswahlleiter.de/bundestagswahlen/2017/ergebnisse/bund-99/land-${it}.html");
//[@id="table-8391"]/tbody[2]/tr[4]/td[2]/span
def afdDiv = page.getByXPath('//table/tbody[2]/tr[4]/td[5]'); // Zweitstimmen afd
def afdPercentDiv = page.getByXPath('//table/tbody[2]/tr[4]/td[6]'); // Zweitstimmenprozent afd im BL
def afdStimmen = Integer.parseInt(afdDiv[0].asText().replaceAll("\\.", ""))
def afdPercent= Double.parseDouble(afdPercentDiv[0].asText().replaceAll(",", "."))
def blDiv = page.getByXPath('//*[@id="main"]/nav/ul/li[2]/div[1]/div/div/button/span')[0];
def bl = blDiv.asText()
[bl, afdStimmen, isOsten(bl), afdPercent]
}
print(data)
data.inject([:]) {acc, e ->
def ow = e[2]
def stimmen = acc.get(ow, 0)
acc.put(ow, stimmen + e[1])
acc
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.