Created
April 26, 2013 15:29
-
-
Save scitasy/5468133 to your computer and use it in GitHub Desktop.
A class that uses Selenium to scrape data from a webpage and store it in a JSON file. IS PRETTY NIFTY YA'LL.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class HTMLtoJSON | |
{ | |
public static void main(String[] args) | |
{ | |
//I need to know the range of long and lat for a project I'm working on | |
//so I declare variables that I will use later to store the info | |
double xMin = -100; | |
double xMax = -100; | |
double yMin = -100; | |
double yMax = -100; | |
//Create the Selenium WebDriver object | |
WebDriver driver = new FirefoxDriver(); | |
//Navigate to the subway locations page | |
driver.get("http://wiki.openstreetmap.org/wiki/List_of_London_Underground_stations"); | |
//Find and store the table element | |
WebElement table = driver.findElement(By.className("wikitable")); | |
//Store all the rows in the table | |
List<WebElement> list = table.findElements(By.xpath("/html/body/div[3]/div[2]/div[4]/table[2]/tbody/tr")); | |
//Create the JSONArray each JSONObject will be stored in | |
JSONArray stations = new JSONArray(); | |
//Go through each row in the table | |
for(int x = 0; x<list.size(); x++) | |
{ | |
//Store the columns of each row in a new list | |
List<WebElement> newList = list.get(x).findElements(By.xpath("td")); | |
//Create a new JSONObject to store the station details in | |
JSONObject object = new JSONObject(); | |
if(newList.size()>0) | |
{ | |
//Store the lat and long coords | |
double lati = Double.parseDouble(newList.get(1).getText().substring(0,7)); | |
double longi = Double.parseDouble(newList.get(2).getText().substring(0,7)); | |
//If this is the first we store coords, set them as the range | |
if(yMin==-100 && yMax==-100 && xMin==-100 && xMax == -100) | |
{ | |
yMin = lati; | |
yMax = lati; | |
xMin = longi; | |
xMax = longi; | |
} | |
//If the new coords are less/greated than the min or mix, save accordingly | |
if(lati<yMin) | |
{ | |
yMin = lati; | |
} | |
if(lati>yMax) | |
{ | |
yMax = lati; | |
} | |
if(longi<xMin) | |
{ | |
xMin = longi; | |
} | |
if(longi>xMax) | |
{ | |
xMax = longi; | |
} | |
//Store the station info in a JSON object | |
object.put("name", newList.get(0).getText()); | |
object.put("latitude", lati); | |
object.put("longitude", longi); | |
//Add the JSON object to our JSONArray | |
stations.add(object); | |
} | |
} | |
//Hit it and quit it. I'M NOT A DOUCHE. | |
driver.quit(); | |
//Create a jew JSON file and write the data to it | |
try { | |
FileWriter file = new FileWriter("/Users/scitasy/stations.json"); | |
file.write(stations.toJSONString()); | |
file.flush(); | |
file.close(); | |
} catch (IOException e) { | |
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment