Last active
December 16, 2015 17:09
-
-
Save scitasy/5468175 to your computer and use it in GitHub Desktop.
Code that scrapes data from an HTML page using Selenium and stores it as a GeoJSON object. Which, by the way, is just a JSON object it turns out. With a particular format. Just FYI. Y'know. Cuz I didn't.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class HTMLtoGeoJSON | |
{ | |
public static void main(String[] args) | |
{ | |
//Create the Selenium WebDriver object | |
WebDriver driver = new FirefoxDriver(); | |
//Navigate to the subway locations page | |
driver.get("http://wiki.openstreetmap.org/wiki/List_of_London_Underground_stations"); | |
//Find and store the table element | |
WebElement table = driver.findElement(By.className("wikitable")); | |
//Store all the rows in the table | |
List<WebElement> list = table.findElements(By.xpath("/html/body/div[3]/div[2]/div[4]/table[2]/tbody/tr")); | |
//Create the JSONObject the geo data will be stored in | |
JSONObject geoObject = new JSONObject(); | |
//Create the JSONArray that will hold the geo data | |
JSONArray stations = new JSONArray(); | |
//Go through each row in the table | |
for(int x = 0; x<list.size(); x++) | |
{ | |
//Store the columns of each row in a new list | |
List<WebElement> newList = list.get(x).findElements(By.xpath("td")); | |
//Create a new JSONObject to store the station details in | |
JSONObject object = new JSONObject(); | |
//Because the first row of the table returns no data, not including this if kills the program. | |
//Lame, I know. | |
if(newList.size()>0) | |
{ | |
//Define the station object | |
object.put("type", "Feature"); | |
//Create the geometry object | |
JSONObject geometry = new JSONObject(); | |
//Add the geometry properties | |
geometry.put("type", "Point"); | |
//Create a JSONArray for the coordinates | |
JSONArray coords = new JSONArray(); | |
//Add the coordinates to the array | |
coords.add(Double.parseDouble(newList.get(1).getText().substring(0,7))); | |
coords.add(Double.parseDouble(newList.get(2).getText().substring(0,7))); | |
//Add the coords array to the geometry object | |
geometry.put("coordinates", coords); | |
//Add the geometry object to the station | |
object.put("geometry", geometry); | |
//Create the properties object | |
JSONObject properties = new JSONObject(); | |
properties.put("name", newList.get(0).getText()); | |
//Add the properties object to the station | |
object.put("properties", properties); | |
stations.add(object); | |
} | |
} | |
geoObject.put("features", stations); | |
//Define the type of the GeoJSON object | |
geoObject.put("type", "FeatureCollection"); | |
//Hit it and quit it. I'M NOT A DOUCHE. | |
driver.quit(); | |
//Create the JSON file and write the data to it | |
try { | |
FileWriter file = new FileWriter("/Users/scitasy/geostations.json"); | |
file.write(geoObject.toJSONString()); | |
file.flush(); | |
file.close(); | |
} catch (IOException e) { | |
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment