Skip to content

Instantly share code, notes, and snippets.

@doyonghoon
Created October 5, 2015 07:07
Show Gist options
  • Save doyonghoon/2ad751098907ab3f061d to your computer and use it in GitHub Desktop.
Save doyonghoon/2ad751098907ab3f061d to your computer and use it in GitHub Desktop.
sample parser
package dyh.parser;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import util.WLog;
/**
* Created by Spencer Do on 2015. 10. 5..
*
* http://www.assist.org/cgi-bin/REPORT_2/Rep2.pl?aay=15-16&dora=EECS&oia=UCB&ay=15-16&event=19&ria=UCB&agreement=aa&ia=DAC&sia=DAC&dir=1&&sidebar=false&rinst=left&mver=2&kind=5&dt=2
*/
public class Parser {
private static final int NUMBER_OF_HYPHENS = 80;
public File getFile(String fileName) {
StringBuilder result = new StringBuilder();
//Get file from resources folder
ClassLoader classLoader = getClass().getClassLoader();
return new File(classLoader.getResource(fileName).getFile());
}
public void parse(File in) {
try {
Document d = Jsoup.parse(in, "UTF-8");
String body = d.getElementsByTag("body").get(0).text();
String[] rows = getRows(body);
for (String r : rows) {
parseCourses(r);
}
} catch (IOException e) {
e.printStackTrace();
}
}
private void parseCourses(String row) {
String code = null;
List<String> collegeCourses = new ArrayList<>();
String[] lines = row.split("\n");
for (String l : lines) {
String[] split = l.split("\\|");
for (int i = 0; i < split.length; i++) {
String value = getCourseCode(split[i].trim());
if (StringUtils.isEmpty(code)) {
code = value;
} else {
if (!StringUtils.isEmpty(value)) {
collegeCourses.add(value);
}
}
}
}
WLog.i("code: " + code + ", courses: " + collegeCourses.toString());
}
private String getCourseCode(String raw) {
if (!StringUtils.isEmpty(raw)) {
int index = 0;
for (int i = 0; i < raw.toCharArray().length; i++) {
char c = raw.toCharArray()[i];
if (c >= 'a' && c <= 'z') {
index = i;
break;
}
}
if (index > 2) {
String result = raw.substring(0, index - 2).trim();
return result;
}
}
return null;
}
private String[] getRows(String body) {
List<String> result = new ArrayList<>();
String[] lines = body.split(createHyphen(NUMBER_OF_HYPHENS));
if (lines != null && lines.length > 0) {
for (String l : lines) {
if (l.contains("|")) {
result.add(l.trim());
}
}
}
return result.toArray(new String[result.size()]);
}
private String createHyphen(int length) {
String result = "";
for (int i = 0; i < length; i++) {
result += "-";
}
return result;
}
private String[] getCourses(String line) {
return line.split("\\|");
}
private boolean hasUnits(String line) {
return line.matches("\\((.*?)\\)");
}
private float getUnits(String raw) {
raw.replaceAll("\\(", "").replaceAll("\\)", "");
return NumberUtils.toFloat(raw, 0f);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment