Skip to content

Instantly share code, notes, and snippets.

@scitasy
Created April 26, 2013 15:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save scitasy/5468133 to your computer and use it in GitHub Desktop.
Save scitasy/5468133 to your computer and use it in GitHub Desktop.
A class that uses Selenium to scrape data from a webpage and store it in a JSON file. IS PRETTY NIFTY YA'LL.
public class HTMLtoJSON
{
public static void main(String[] args)
{
//I need to know the range of long and lat for a project I'm working on
//so I declare variables that I will use later to store the info
double xMin = -100;
double xMax = -100;
double yMin = -100;
double yMax = -100;
//Create the Selenium WebDriver object
WebDriver driver = new FirefoxDriver();
//Navigate to the subway locations page
driver.get("http://wiki.openstreetmap.org/wiki/List_of_London_Underground_stations");
//Find and store the table element
WebElement table = driver.findElement(By.className("wikitable"));
//Store all the rows in the table
List<WebElement> list = table.findElements(By.xpath("/html/body/div[3]/div[2]/div[4]/table[2]/tbody/tr"));
//Create the JSONArray each JSONObject will be stored in
JSONArray stations = new JSONArray();
//Go through each row in the table
for(int x = 0; x<list.size(); x++)
{
//Store the columns of each row in a new list
List<WebElement> newList = list.get(x).findElements(By.xpath("td"));
//Create a new JSONObject to store the station details in
JSONObject object = new JSONObject();
if(newList.size()>0)
{
//Store the lat and long coords
double lati = Double.parseDouble(newList.get(1).getText().substring(0,7));
double longi = Double.parseDouble(newList.get(2).getText().substring(0,7));
//If this is the first we store coords, set them as the range
if(yMin==-100 && yMax==-100 && xMin==-100 && xMax == -100)
{
yMin = lati;
yMax = lati;
xMin = longi;
xMax = longi;
}
//If the new coords are less/greated than the min or mix, save accordingly
if(lati<yMin)
{
yMin = lati;
}
if(lati>yMax)
{
yMax = lati;
}
if(longi<xMin)
{
xMin = longi;
}
if(longi>xMax)
{
xMax = longi;
}
//Store the station info in a JSON object
object.put("name", newList.get(0).getText());
object.put("latitude", lati);
object.put("longitude", longi);
//Add the JSON object to our JSONArray
stations.add(object);
}
}
//Hit it and quit it. I'M NOT A DOUCHE.
driver.quit();
//Create a jew JSON file and write the data to it
try {
FileWriter file = new FileWriter("/Users/scitasy/stations.json");
file.write(stations.toJSONString());
file.flush();
file.close();
} catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment