Skip to content

Instantly share code, notes, and snippets.

@Romern
Last active July 16, 2018 08:34
Show Gist options
  • Save Romern/58945b0b0f86b15859a6454a39100eb8 to your computer and use it in GitHub Desktop.
Save Romern/58945b0b0f86b15859a6454a39100eb8 to your computer and use it in GitHub Desktop.
OkusonParser LAInf18 Needs https://jsoup.org/packages/jsoup-1.11.2.jar in the same path
javac -cp .:jsoup-1.11.2.jar OkusonParser.java
#!/bin/bash
echo Matrnr:
read matrnr
echo Password:
read -s pass
pass=$(echo -e "import urllib.parse\nprint(urllib.parse.quote_plus('$pass'))" | python3)
for i in `seq 1 9`; do
wget "https://www2.math.rwth-aachen.de/LAInf18/QuerySheet?id=$matrnr&passwd=$pass&format=MathJax&resolution=Standard&sheet=$i" -O sheet$i.html
done
exit
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.File;
import java.nio.charset.StandardCharsets;
import java.io.IOException;
import java.nio.file.Files;
import java.util.*;
public class OkusonParser {
public static void main(String[] args) throws Exception {
if (args.length < 1) {
System.out.println("Usage: java OkusonParser [Sheet-Folder-1] ... [Sheet-Folder-n]\n\n" +
"In each sheet-folder have to be only files named sheetXX.html, and the same amount in each folder.\n" +
"Use downloadskript.sh to download them properly instead of through the browser.\n");
return;
}
File init = new File(args[0]);
if (!init.isDirectory()) {
System.err.println(args[0]+" is not a directory!");
return;
}
String[] initf = init.list();
if (initf == null) {
System.err.println("Error reading "+args[0]+".");
return;
}
// Parse the input folders to Strings:
int sheets = initf.length;
String input[][] = new String[args.length][sheets];
File[][] files = new File[args.length][sheets];
for (int i = 0; i< args.length; i++) {
File cur = new File(args[i]);
File[] cure = cur.listFiles();
if (!cur.isDirectory()) {
System.err.println(args[i]+" is not a directory!");
return;
}
if ( cure == null) {
System.err.println("Can't list files on folder "+args[i]+"!");
return;
}
if (cure.length != sheets) {
System.err.println("Sheet folder \""+args[i]+"\" has a different amount of files than \""+args[0]+"\"!");
return;
}
files[i] = cure;
Arrays.sort(files[i]);
}
for (int i = 0; i< args.length; i++) {
for(int j = 0; j< sheets; j++) {
input[i][j] = preProcessRules(readFile(files[i][j]));
}
}
// Parse the input files into Objects, with no duplicate questions
Blatt[] blaetter = new Blatt[sheets];
for(int j = 0; j< sheets; j++) {
blaetter[j] = new Blatt(files[0][j].getName().replace("sheet","").replace(".html",""));
}
for (int i = 0; i< args.length; i++) {
for(int j = 0; j< sheets; j++) {
Document doc = Jsoup.parse(input[i][j]);
Elements elements = doc.body().select("*");
String currentaufgabe = "";
String currentquestion = "";
boolean newquestion = true;
for (Element element : elements) {
switch(element.className()) {
case "hidden":
String curhead = element.text().replace("\\(","").replace("\\)","");
if (!header.contains(curhead))
header += "\n"+curhead+"\n";
break;
case "number":
case "exnr":
currentaufgabe = element.text();
if (!blaetter[j].aufgaben.containsKey(currentaufgabe)) {
blaetter[j].aufgaben.put(currentaufgabe,new Aufgabe());
}
break;
case "intro":
case "extext":
if (blaetter[j].aufgaben.get(currentaufgabe).aufgabentext == null) {
blaetter[j].aufgaben.get(currentaufgabe).aufgabentext = element.text();
}
break;
case "question":
if (!blaetter[j].aufgaben.get(currentaufgabe).subaufgaben.containsKey(element.text())) {
currentquestion = element.text();
blaetter[j].aufgaben.get(currentaufgabe).subaufgaben.put(currentquestion,"");
newquestion = true;
} else {
newquestion = false;
}
break;
case "erg":
if (newquestion) {
blaetter[j].aufgaben.get(currentaufgabe).subaufgaben.put(currentquestion,element.text());
}
break;
}
}
}
}
//Parse the objects into a latex file
StringBuilder out = new StringBuilder();
out.append(header);
out.append(begin);
for (Blatt b : blaetter) {
out.append(String.format("\\section{Blatt %s}\n",b.blattnummer));
for (String aufgabennummer : b.aufgaben.keySet()) {
out.append(String.format("\\subsection{Aufgabe %s}\n",aufgabennummer));
out.append(b.aufgaben.get(aufgabennummer).aufgabentext);
out.append("\n\\begin{itemize}\n");
int j = 0;
for (String sub : b.aufgaben.get(aufgabennummer).subaufgaben.keySet()) {
out.append("\n\\item ");
out.append(sub);
out.append("\n");
if (j<b.aufgaben.get(aufgabennummer).subaufgaben.size())
out.append(String.format("\\newline\\fbox{\\parbox{0.9\\textwidth}{Solution: %s}}",b.aufgaben.get(aufgabennummer).subaufgaben.get(sub)));
}
out.append("\n\\end{itemize}\n");
}
}
out.append("\n\\end{document}");
System.out.println(postProcessRules(out.toString()));
}
public static class Aufgabe {
String aufgabentext;
Map<String,String> subaufgaben;
public Aufgabe () {
subaufgaben = new TreeMap<>();
}
}
public static class Blatt {
String blattnummer;
Map<String, Aufgabe> aufgaben;
public Blatt (String bn) {
blattnummer = bn;
aufgaben = new TreeMap<>();
}
}
private static String readFile(File file) throws IOException {
byte[] encoded = Files.readAllBytes(file.toPath());
return new String(encoded, StandardCharsets.UTF_8);
}
private static String postProcessRules(String prefinal) {
return prefinal.replace("verbatim}\\","verbatim}")
.replace("\\begin{itemize}\n\n\\end{itemize}","");
}
private static String preProcessRules(String init) {
return init.replace("</p>\n","</p>\\\\\n")
.replace("\\\\%","\\\\")
.replace("<code>","\\verb|")
.replace("</code>","|")
.replace("{};","{ };")
.replace("{}","\\{\\}")
.replace("<ol>","\\begin{enumerate}[label=(\\alph*)]")
.replace("<ul>","\\begin{enumerate}[label=(\\roman*)]")
.replace("</ol>","\\end{enumerate}")
.replace("</ul>","\\end{enumerate}")
.replace("<dt>","\\textbf{")
.replace("</dt>","}")
.replace("<li>","\\item ")
.replace("</li>","")
.replace("<em>","\\textit{")
.replace("</em>","}")
.replace("\\newcommand{\\ExpandedCoefficientMatrix}[2]{<![CDATA[\\left( \\!\\!\\! \\begin{array}{c|c} {#1} & {#2} \\end{array} \\!\\!\\! \\right)]]>}","\\newcommand{\\ExpandedCoefficientMatrix}[2]{\\left( \\!\\!\\! \\begin{array}{c|c} {#1} & {#2} \\end{array} \\!\\!\\! \\right)]}")
.replace("<p>[","<p>$[$")
.replace("]</p>","$]$</p>")
.replaceAll("(?m)%.*$","");
}
private static String header = "\\documentclass[12pt,a4paper]{article}\n"
+"\\usepackage[utf8]{inputenc}\n"
+"\\usepackage[german]{babel}\n"
+"\\usepackage{amsmath}\n"
+"\\usepackage{amsfonts}\n"
+"\\usepackage{amssymb}\n"
+"\\usepackage{amsthm}\n"
+"\\usepackage{mathtools}\n"
+"\\usepackage{enumitem}\n"
+"\\usepackage[top=1in, bottom=1.25in, left=1.25in, right=1.25in]{geometry}\n"
+"\\usepackage{hyperref}\n"
+"\\usepackage{tikz}\n"
+"\\usepackage{pb-diagram}\n"
+"\\usetikzlibrary{arrows.meta,positioning,calc}\n"
+"\\tikzset{graph node/.style={circle,fill=black,draw,minimum size=5pt,inner sep=0pt},node distance=1.5cm and 1.5cm,graph/.style={ }}\n"
+"\\newcommand{\\n}{~\\\\}\n";
private static String begin = "\n\\title{TITEL}\n"
+"\\author{der boy der g, sick wie leukaemie}\n"
+"\\begin{document}\n"
+"\\maketitle\n"
+"\\setcounter{secnumdepth}{0}\n";
}
java -cp .:jsoup-1.11.2.jar OkusonParser $@
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment