Skip to content

Instantly share code, notes, and snippets.

@scitasy
Last active December 16, 2015 17:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save scitasy/5468175 to your computer and use it in GitHub Desktop.
Save scitasy/5468175 to your computer and use it in GitHub Desktop.
Code that scrapes data from an HTML page using Selenium and stores it as a GeoJSON object. Which, by the way, is just a JSON object it turns out. With a particular format. Just FYI. Y'know. Cuz I didn't.
public class HTMLtoGeoJSON
{
public static void main(String[] args)
{
//Create the Selenium WebDriver object
WebDriver driver = new FirefoxDriver();
//Navigate to the subway locations page
driver.get("http://wiki.openstreetmap.org/wiki/List_of_London_Underground_stations");
//Find and store the table element
WebElement table = driver.findElement(By.className("wikitable"));
//Store all the rows in the table
List<WebElement> list = table.findElements(By.xpath("/html/body/div[3]/div[2]/div[4]/table[2]/tbody/tr"));
//Create the JSONObject the geo data will be stored in
JSONObject geoObject = new JSONObject();
//Create the JSONArray that will hold the geo data
JSONArray stations = new JSONArray();
//Go through each row in the table
for(int x = 0; x<list.size(); x++)
{
//Store the columns of each row in a new list
List<WebElement> newList = list.get(x).findElements(By.xpath("td"));
//Create a new JSONObject to store the station details in
JSONObject object = new JSONObject();
//Because the first row of the table returns no data, not including this if kills the program.
//Lame, I know.
if(newList.size()>0)
{
//Define the station object
object.put("type", "Feature");
//Create the geometry object
JSONObject geometry = new JSONObject();
//Add the geometry properties
geometry.put("type", "Point");
//Create a JSONArray for the coordinates
JSONArray coords = new JSONArray();
//Add the coordinates to the array
coords.add(Double.parseDouble(newList.get(1).getText().substring(0,7)));
coords.add(Double.parseDouble(newList.get(2).getText().substring(0,7)));
//Add the coords array to the geometry object
geometry.put("coordinates", coords);
//Add the geometry object to the station
object.put("geometry", geometry);
//Create the properties object
JSONObject properties = new JSONObject();
properties.put("name", newList.get(0).getText());
//Add the properties object to the station
object.put("properties", properties);
stations.add(object);
}
}
geoObject.put("features", stations);
//Define the type of the GeoJSON object
geoObject.put("type", "FeatureCollection");
//Hit it and quit it. I'M NOT A DOUCHE.
driver.quit();
//Create the JSON file and write the data to it
try {
FileWriter file = new FileWriter("/Users/scitasy/geostations.json");
file.write(geoObject.toJSONString());
file.flush();
file.close();
} catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment