Skip to content

Instantly share code, notes, and snippets.

@Romern
Created June 4, 2021 17:03
Show Gist options
  • Save Romern/3ac8db103abaed2a73c707a55bd78186 to your computer and use it in GitHub Desktop.
Save Romern/3ac8db103abaed2a73c707a55bd78186 to your computer and use it in GitHub Desktop.
old moodle parser
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.jsoup.nodes.Attributes;
import java.io.File;
import java.nio.charset.StandardCharsets;
import java.io.IOException;
import java.nio.file.Files;
import java.util.*;
public class MoodleParser {
public static void main(String[] args) throws Exception {
if (args.length < 1) {
System.out.println("Usage: java MoodleParser [Sheet-Folder-1] ... [Sheet-Folder-n]\n\n" +
"In each sheet-folder has to be a sheet.html as well as all pictures on the page.\n");
return;
}
File init = new File(args[0]);
if (!init.isDirectory()) {
System.err.println(args[0]+" is not a directory!");
return;
}
String[] initf = init.list();
if (initf == null) {
System.err.println("Error reading "+args[0]+".");
return;
}
// Parse the input folders to Strings:
String input[] = new String[args.length];
File[] files = new File[args.length];
for (int i = 0; i< args.length; i++) {
files[i] = new File(args[i]+"/sheet.html");
}
for (int i = 0; i< args.length; i++) {
input[i] = preprocess(readFile(files[i]));
}
// Parse the input files into Objects, with no duplicate questions
Blatt[] blaetter = new Blatt[args.length];
for(int j = 0; j< args.length; j++) {
blaetter[j] = new Blatt(args[j].split("sheet")[1]);
}
for (int i = 0; i< args.length; i++) {
Document doc = Jsoup.parse(input[i]);
Elements elements = doc.body().select("*");
String currentaufgabe = "";
for (Element element : elements) {
switch(element.className()) {
case "qno":
currentaufgabe = element.text();
if (!blaetter[i].aufgaben.containsKey(Integer.parseInt(currentaufgabe))) {
blaetter[i].aufgaben.put(Integer.parseInt(currentaufgabe),new Aufgabe());
}
break;
case "filter_mathjaxloader_equation":
blaetter[i].aufgaben.get(Integer.parseInt(currentaufgabe)).aufgabentext += element.wholeText​();
//FIX THIS
break;
case "qtext":
String out = element.ownText​();
for (Element e: element.children()) {
out += e.text();
if(e.tag().getName​().equals("p")) {
if(e.children().size() > 0 && e.children().first().tag().getName​().equals("img")) {
Element imgageelement = e.children().first();
Attributes imgb = imgageelement.attributes​();
String[] urlb = imgb.get("src").split("/");
int widthb = Integer.parseInt(imgb.get("width"))/2;
out += "\\\\\\includegraphics[width="+widthb+"pt]{" + files[i].getParent() + "/" + urlb[urlb.length-1] + "}\\\\";
}
}
}
//System.out.println(out);
blaetter[i].aufgaben.get(Integer.parseInt(currentaufgabe)).aufgabentext += out;
break;
case "prompt":
blaetter[i].aufgaben.get(Integer.parseInt(currentaufgabe)).aufgabentext += "\n\n" + element.text();
break;
case "droparea":
Attributes img = element.children().select("img").first().attributes​();
String[] urla = img.get("src").split("/");
String q = "\\includegraphics{" + files[i].getParent() + "/" + urla[urla.length-1] + "}";
blaetter[i].aufgaben.get(Integer.parseInt(currentaufgabe)).aufgabentext += "\n\n" + q;
break;
case "dragitemgroup1":
for(Element e : element.children()) {
blaetter[i].aufgaben.get(Integer.parseInt(currentaufgabe)).antwortmoeglichkeiten.add(replacementMathStuff(e.text()));
blaetter[i].aufgaben.get(Integer.parseInt(currentaufgabe)).answernumber.add("");
}
break;
case "answer":
for (Element e : element.children().select("tr")) {
String question = e.children().select("td").first().text() + ":";
String answers = "\\begin{itemize}\n";
for (Element a : e.children().select("select").first().children()) {
if (!a.text().equals("Auswählen..."))
answers += "\n\\item " + a.text();
}
answers += "\n\\end{itemize}";
blaetter[i].aufgaben.get(Integer.parseInt(currentaufgabe)).antwortmoeglichkeiten.add(question+answers);
blaetter[i].aufgaben.get(Integer.parseInt(currentaufgabe)).answernumber.add("");
}
break;
case "rightanswer":
String rightanswer = "";
if (element.parent().children().select("div.generalfeedback").size()>0) {
rightanswer += element.parent().children().select("div.generalfeedback").text();
if (element.parent().children().select("div.generalfeedback").first().children().select("img").size()>0) {
Attributes imga = element.parent().children().select("div.generalfeedback").first().children().select("img").first().attributes​();
String[] url = imga.get("src").split("/");
int width = Integer.parseInt(imga.get("width"))/2;
rightanswer += "\\includegraphics[width="+width+"pt]{" + files[i].getParent() + "/" + url[url.length-1] + "}";
}
}
if (element.children().select("img").size()>0) {
Attributes imga = element.children().select("img").first().attributes​();
String[] url = imga.get("src").split("/");
int width = Integer.parseInt(imga.get("width"))/2;
rightanswer += "\\includegraphics[width="+width+"pt]{" + files[i].getParent() + "/" + url[url.length-1] + "}";
} else {
rightanswer += element.ownText();
}
blaetter[i].aufgaben.get(Integer.parseInt(currentaufgabe)).solution +=rightanswer;
break;
case "ml-1":
String answer = "";
if (element.children().select("img").size()>0) {
Attributes imga = element.children().select("img").first().attributes​();
String[] url = imga.get("src").split("/");
int width = Integer.parseInt(imga.get("width"))/2;
answer = "\\includegraphics[width="+width+"pt]{" + files[i].getParent() + "/" + url[url.length-1] + "}";
} else {
answer = element.ownText();
}
String answernumber = element.children().select("span").first().text();
blaetter[i].aufgaben.get(Integer.parseInt(currentaufgabe)).antwortmoeglichkeiten.add(answer);
blaetter[i].aufgaben.get(Integer.parseInt(currentaufgabe)).answernumber.add(answernumber);
break;
}
}
}
//Parse the objects into a latex file
StringBuilder out = new StringBuilder();
out.append(header);
out.append(begin);
for (Blatt b : blaetter) {
out.append(String.format("\\section{Blatt %s}\n",b.blattnummer));
for (Integer aufgabennummer : b.aufgaben.keySet()) {
out.append(String.format("\n\n\\subsection{Aufgabe %s}\n",aufgabennummer));
out.append(replacementStuff(b.aufgaben.get(aufgabennummer).aufgabentext));
if(b.aufgaben.get(aufgabennummer).antwortmoeglichkeiten.size()>0) {
out.append("\n\\begin{itemize}\n");
int j = 0;
for (int i = 0; i< b.aufgaben.get(aufgabennummer).antwortmoeglichkeiten.size(); i++) {
if(b.aufgaben.get(aufgabennummer).answernumber.get(i).equals("")) {
out.append("\n\\item ");
} else {
out.append("\n\\item["+b.aufgaben.get(aufgabennummer).answernumber.get(i)+"]");
}
out.append(replacementStuff(b.aufgaben.get(aufgabennummer).antwortmoeglichkeiten.get(i)));
out.append("\n");
}
out.append("\n\\end{itemize}\n");
}
out.append(String.format("~\\newline\\showsolution{\\fbox{\\parbox{0.9\\textwidth}{Solution: %s}}}",b.aufgaben.get(aufgabennummer).solution));
}
}
out.append("\n\\end{document}");
System.out.println(postProcessRules(out.toString()));
}
public static class Aufgabe {
String aufgabentext;
List<String> antwortmoeglichkeiten;
List<String> answernumber;
String solution;
public Aufgabe () {
antwortmoeglichkeiten = new ArrayList<>();
answernumber = new ArrayList<>();
solution = "";
aufgabentext = "";
}
}
public static class Blatt {
Integer blattnummer;
Map<Integer, Aufgabe> aufgaben;
public Blatt (String bn) {
blattnummer = Integer.parseInt(bn);
aufgaben = new TreeMap<>();
}
}
private static String readFile(File file) throws IOException {
byte[] encoded = Files.readAllBytes(file.toPath());
return new String(encoded, StandardCharsets.UTF_8);
}
private static String postProcessRules(String prefinal) {
return prefinal.replace("=>","$\\Rightarrow$")
//.replace("\\)","\\end{alignat*}")
//.replace("\\(","\\begin{alignat*}{2}")
.replace("\"","\'\'");
}
private static String replacementStuff(String s) {
return s.replace("&","\\&").replace("%","\\%");
}
private static String preprocess(String s) {
return s.replace("<br>","~\\\\");
}
private static String replacementMathStuff(String s) {
if(s.contains("_")) {
s = "$" + s + "$";
}
return s;
}
private static String header = "\\documentclass[12pt,a4paper]{article}\n"
+"\\usepackage[utf8]{inputenc}\n"
+"\\usepackage[german]{babel}\n"
+"\\usepackage{amsmath}\n"
+"\\usepackage{amsfonts}\n"
+"\\usepackage{amssymb}\n"
+"\\usepackage{amsthm}\n"
+"\\usepackage{mathtools}\n"
+"\\usepackage{enumitem}\n"
+"\\usepackage[top=1in, bottom=1.25in, left=1.25in, right=1.25in]{geometry}\n"
+"\\usepackage{hyperref}\n"
+"\\usepackage{tikz}\n"
+"\\usepackage{ifthen}\n"
+"\\usetikzlibrary{arrows.meta,positioning,calc}\n"
+"\\tikzset{graph node/.style={circle,fill=black,draw,minimum size=5pt,inner sep=0pt},node distance=1.5cm and 1.5cm,graph/.style={ }}\n"
+"\\newcommand{\\n}{~\\\\}\n"
+"\\newboolean{showsolutions}\n"
+"\\setboolean{showsolutions}{false}\n"
+"\\newcommand{\\showsolution}[1]{\\ifthenelse{\\boolean{showsolutions}}{#1}{ }}\n";
private static String begin = "\n\\title{TITEL}\n"
+"\\author{der boy der g, sick wie leukaemie}\n"
+"\\begin{document}\n"
+"\\maketitle\n"
+"\\setcounter{secnumdepth}{0}\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment