Last active
December 16, 2015 13:39
-
-
Save scitasy/5443400 to your computer and use it in GitHub Desktop.
Using Selenium to SCRAPE DAT WEB.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class Main | |
{ | |
public static void main(String[] args) | |
{ | |
//Create the Selenium WebDriver object | |
WebDriver driver = new FirefoxDriver(); | |
//Navigate to the subway locations page | |
driver.get("http://wiki.openstreetmap.org/wiki/List_of_London_Underground_stations"); | |
//Find and store the table element | |
WebElement table = driver.findElement(By.className("wikitable")); | |
//Store all the rows in the table | |
List<WebElement> list = table.findElements(By.xpath("/html/body/div[3]/div[2]/div[4]/table[2]/tbody/tr")); | |
//Create the JSONArray each JSONObject will be stored in | |
JSONArray stations = new JSONArray(); | |
//Go through each row in the table | |
for(int x = 0; x<list.size(); x++) | |
{ | |
//Store the columns of each row in a new list | |
List<WebElement> newList = list.get(x).findElements(By.xpath("td")); | |
//Create a new JSONObject to store the station details in | |
JSONObject object = new JSONObject(); | |
if(newList.size()>0) | |
{ | |
object.put("name", newList.get(0).getText()); | |
object.put("latitude", newList.get(1).getText()); | |
object.put("longitude", newList.get(2).getText()); | |
stations.add(object); | |
} | |
} | |
//Hit it and quit it. I'M NOT A DOUCHE. | |
driver.quit(); | |
try { | |
FileWriter file = new FileWriter("/Users/Scitasy/stations.json"); | |
file.write(stations.toJSONString()); | |
file.flush(); | |
file.close(); | |
} catch (IOException e) { | |
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment