public class TimeTable { | |
List<String> week = new ArrayList<>(); | |
List<String> saturday = new ArrayList<>(); | |
List<String> sunday = new ArrayList<>(); | |
List<String> nextRides; | |
public List<String> collectMinutes(String line, String hour) { | |
return Arrays.asList(line.split(" ")).stream() | |
.filter(s -> !s.trim().isEmpty()).map(s -> hour + ":" + s.trim()).collect(Collectors.toList()); | |
} | |
public void addHourLine(List<String> line) { | |
String hour = line.get(0).replace("“", ""); | |
week.addAll(collectMinutes(line.get(1), hour)); | |
saturday.addAll(collectMinutes(line.get(2), hour)); | |
sunday.addAll(collectMinutes(line.get(3), hour)); | |
} | |
public TimeTable fetchTimeTable(Stop stop) throws Exception { | |
String url = URL_CONST + "/?ligne=" + stop.getLine() + "&head=" + stop.getHead().replaceAll("\\s", "%20") + "%7C" + stop.getLine() + "&arret=" + stop.getStopId(); | |
Document doc = Jsoup.connect(url).timeout(timeTableConnectionTimeout).get(); | |
String imageUrl = doc.select("#target_plan").attr("src").replaceAll(" ", "%20"); | |
TimeTable tt = new TimeTable(); | |
Tesseract instance = Tesseract.getInstance(); | |
try { | |
instance.setPageSegMode(6); // Assume a single uniform block of text. | |
String result = instance.doOCR(ImageIO.read(new URL(imageUrl))); | |
System.out.println("OCR Ok"); | |
Arrays.asList(result.split("\n")) | |
.stream() | |
.filter(s -> s.contains("“")) // keep only time table entry | |
.map(s -> s.replaceAll("O", "0") + " ") // replace incorrectly detected zeros | |
.map(s -> Arrays.asList((s.split(" ")[0] + s.replaceAll(s.split(" ")[0], ";")).split(";"))) // accumulate hour and minutes | |
.forEach(line -> tt.addHourLine(line)); // add line to timetable | |
} catch (TesseractException e) { | |
// Please HELP | |
} | |
return tt; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment