Created
June 15, 2017 14:27
-
-
Save EvanKnowles/e70032db8d327ad0194ef2c647ebbbeb to your computer and use it in GitHub Desktop.
Testing the extract
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package za.co.knonchalant.espn; | |
import org.jsoup.Jsoup; | |
import org.jsoup.nodes.Document; | |
import org.jsoup.nodes.Element; | |
import org.jsoup.select.Elements; | |
import org.junit.Test; | |
import java.io.IOException; | |
import java.util.ArrayList; | |
import java.util.HashMap; | |
import java.util.List; | |
import java.util.Map; | |
public class TestExtract { | |
@Test | |
public void test() throws IOException { | |
Document doc = Jsoup.connect("http://kwese.espn.com/football/table/_/league/eng.1/season/2016").get(); | |
Elements table = doc.select("table"); | |
Elements headers = table.select("thead th"); | |
String year = headers.get(0).text(); | |
System.out.println("Year: " + year); | |
List<String> columnHeaders = new ArrayList<>(); | |
for (int i = 1; i < headers.size(); i++) { | |
columnHeaders.add(headers.get(i).text()); | |
} | |
System.out.println(columnHeaders); | |
Elements select = table.select("tbody tr"); | |
Map<String, List<String>> teams = new HashMap<>(); | |
String teamName = null; | |
for (int i = 0; i < select.size(); i++) { | |
Element element = select.get(i); | |
Elements cols = element.select("td"); | |
for (int j = 0; j < cols.size(); j++) { | |
Element col = cols.get(j); | |
if (j == 0) { | |
teamName = col.select(".team-names").text(); | |
teams.put(teamName, new ArrayList<String>()); | |
} else { | |
teams.get(teamName).add(col.text()); | |
} | |
} | |
} | |
//System.out.println(teams); | |
StringBuilder result = new StringBuilder(new StringBuilder().append("<standings>\n\t").append("<year>").append(year).append("</year>\n<teams>\n\t").toString()); | |
for (String team : teams.keySet()){ | |
result.append("<team>\n\t\t<name>").append(team).append("</name>\n"); | |
List<String> cols = teams.get(team); | |
for (int i = 0; i < columnHeaders.size(); i++) { | |
result.append(String.format("\t\t<%s>%s</%s>\n", columnHeaders.get(i), cols.get(i), columnHeaders.get(i))); | |
} | |
result.append("\t</team>\n\t"); | |
} | |
result.append("</teams></standings>"); | |
System.out.println(result.toString()); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is great!!! At least I have something to work with and I can modify it further.
Thank you very much Mr Knowles!