Skip to content

Instantly share code, notes, and snippets.

@scitasy
Last active December 16, 2015 13:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save scitasy/5443400 to your computer and use it in GitHub Desktop.
Save scitasy/5443400 to your computer and use it in GitHub Desktop.
Using Selenium to SCRAPE DAT WEB.
public class Main
{
public static void main(String[] args)
{
//Create the Selenium WebDriver object
WebDriver driver = new FirefoxDriver();
//Navigate to the subway locations page
driver.get("http://wiki.openstreetmap.org/wiki/List_of_London_Underground_stations");
//Find and store the table element
WebElement table = driver.findElement(By.className("wikitable"));
//Store all the rows in the table
List<WebElement> list = table.findElements(By.xpath("/html/body/div[3]/div[2]/div[4]/table[2]/tbody/tr"));
//Create the JSONArray each JSONObject will be stored in
JSONArray stations = new JSONArray();
//Go through each row in the table
for(int x = 0; x<list.size(); x++)
{
//Store the columns of each row in a new list
List<WebElement> newList = list.get(x).findElements(By.xpath("td"));
//Create a new JSONObject to store the station details in
JSONObject object = new JSONObject();
if(newList.size()>0)
{
object.put("name", newList.get(0).getText());
object.put("latitude", newList.get(1).getText());
object.put("longitude", newList.get(2).getText());
stations.add(object);
}
}
//Hit it and quit it. I'M NOT A DOUCHE.
driver.quit();
try {
FileWriter file = new FileWriter("/Users/Scitasy/stations.json");
file.write(stations.toJSONString());
file.flush();
file.close();
} catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment