Skip to content

Instantly share code, notes, and snippets.

@jmini
Created November 27, 2014 05:17
Show Gist options
  • Save jmini/5f3e99c897c165fa2219 to your computer and use it in GitHub Desktop.
Save jmini/5f3e99c897c165fa2219 to your computer and use it in GitHub Desktop.
Create toc.xml with mylyn-intent from existing HTML files
/*******************************************************************************
* Copyright (c) 2014 BSI Business Systems Integration AG.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* BSI Business Systems Integration AG - initial API and implementation
******************************************************************************/
package buildhelp;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.eclipse.mylyn.wikitext.core.parser.outline.OutlineItem;
import org.eclipse.mylyn.wikitext.core.parser.util.MarkupToEclipseToc;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.google.common.base.Charsets;
import com.google.common.io.Files;
public class BuildHelpTocMain {
/**
* Compute the outline tree for several files: the children nodes are added to the rootNode.
*
* @param rootNode
* the root node containing the outline
* @param files
* list of files that needs to be parsed
* @param rootFolder
* the folder containing all the files (where the xml toc file will be written)
* @throws IOException
*/
private static void computeTocFile(OutlineItemEx rootNode, List<File> files, File rootFolder) throws IOException {
Map<Integer, OutlineItemEx> nodeMap = new HashMap<Integer, OutlineItemEx>();
putNode(nodeMap, rootNode, rootNode.getLevel());
for (File file : files) {
String html = Files.toString(file, Charsets.UTF_8);
String filePath = file.getAbsolutePath().substring(rootFolder.getAbsolutePath().length() + 1).replaceAll("\\\\", "/");
Document doc = Jsoup.parse(html);
computeOutlineNodes(nodeMap, doc, filePath);
}
}
/**
* @param rootNode
* the root node containing the outline
* @param tocFile
* the file where the toc content is written
* @throws IOException
*/
static void writeTocFile(OutlineItemEx rootNode, File tocFile) throws IOException {
MarkupToEclipseToc eclipseToc = new MarkupToEclipseToc() {
@Override
protected String computeFile(OutlineItem item) {
if (item instanceof OutlineItemEx && ((OutlineItemEx) item).getFilePath() != null) {
return ((OutlineItemEx) item).getFilePath();
}
return super.computeFile(item);
}
};
eclipseToc.setBookTitle(rootNode.getLabel());
eclipseToc.setHtmlFile(rootNode.getFilePath());
String tocContent = eclipseToc.createToc(rootNode);
Files.write(tocContent, tocFile, Charsets.UTF_8);
}
/**
* Compute the outline tree for one document
*
* @param nodeMap
* the map containing the last known node for each level
* @param doc
* the html content as JSoup document
* @param filePath
* path to the file corresponding to the document
*/
static void computeOutlineNodes(Map<Integer, OutlineItemEx> nodeMap, Document doc, String filePath) {
Elements elements = doc.getAllElements();
for (Element element : elements) {
if (element.nodeName().matches("h[1-6]")) {
String id = findId(element);
if (id == null) {
System.err.println("id is not found for node " + element.nodeName() + " '" + element.text() + "'");
}
int level = Integer.parseInt(element.nodeName().substring(1));
String title = element.text();
OutlineItem parent = findParent(nodeMap, level);
OutlineItemEx node = new OutlineItemEx(parent, level, id, 0, 0, title);
node.setFilePath(filePath);
putNode(nodeMap, node, level);
}
}
}
/**
* Put the node in
*
* @param nodeMap
* the map containing the last known node for each level
* @param node
* the node that needs to be added
* @param level
* the level (1 for h1, 2 for h2 ...)
*/
static void putNode(Map<Integer, OutlineItemEx> nodeMap, OutlineItemEx node, int level) {
nodeMap.put(level, node);
}
/**
* Find the parent node given a specific level.
*
* @param nodeMap
* the map containing the last known node for each level
* @param level
* the level of the current node
* @return parentNode
*/
static OutlineItem findParent(Map<Integer, OutlineItemEx> nodeMap, int level) {
int i = level - 1;
while (nodeMap.get(i) == null && i > 0) {
i = i - 1;
}
return nodeMap.get(i);
}
/**
* Find the id of a header tag. id is defined as id attribute of the header, or as id attribute of a nested "a" tag
*
* @param element
* element corresponding to the HTML header tag (h1, h2, h3, h4, h5 or h6)
* @return id
*/
static String findId(Element element) {
String id = findIdForElement(element);
if (id == null) {
Elements childElements = element.getElementsByTag("a");
int i = 0;
while (id == null && i < childElements.size()) {
Element childElement = childElements.get(i);
id = findIdForElement(childElement);
i = i + 1;
}
}
return id;
}
private static String findIdForElement(Element element) {
if (element.id() != null && element.id().length() > 0) {
return element.id();
}
return null;
}
public static void main(String[] args) throws IOException {
OutlineItemEx root = new OutlineItemEx(null, 0, "id", 0, 0, "Eclipse Scout User Guide");
root.setFilePath("scout_help.html");
File rootFolder = new File("../scouthelp");
List<File> files = Arrays.asList(
new File(new File(rootFolder, "html"), "scout_helpli1.html"),
new File(new File(rootFolder, "html"), "scout_helpli2.html"),
new File(new File(rootFolder, "html"), "scout_helpli3.html"),
new File(new File(rootFolder, "html"), "scout_helpli4.html"),
new File(new File(rootFolder, "html"), "scout_helpli5.html")
);
computeTocFile(root, files, rootFolder);
writeTocFile(root, new File(rootFolder, "toc.xml"));
}
}
/*******************************************************************************
* Copyright (c) 2014 BSI Business Systems Integration AG.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* BSI Business Systems Integration AG - initial API and implementation
******************************************************************************/
package buildhelp;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import java.util.HashMap;
import java.util.Map;
import org.eclipse.mylyn.wikitext.core.parser.outline.OutlineItem;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.parser.Tag;
import org.junit.Test;
/**
* Unit test for {@link BuildHelpTocMain}
*/
public class BuildHelpTocTest {
OutlineItemEx ROOT = new OutlineItemEx(null, 0, "#x0-1000", 0, 0, "ROOT");
OutlineItemEx NODE_1A = new OutlineItemEx(ROOT, 1, "#x1-1000", 0, 0, "H1 - A");
OutlineItemEx NODE_1B = new OutlineItemEx(ROOT, 1, "#x2-1000", 0, 0, "H1 - B");
OutlineItemEx NODE_2 = new OutlineItemEx(NODE_1A, 2, "#x3-1000", 0, 0, "H2");
OutlineItemEx NODE_3 = new OutlineItemEx(NODE_2, 3, "#x4-1000", 0, 0, "H3");
/**
* Test method for {@link buildhelp.BuildHelpTocMain#putNode(java.util.Map, buildhelp.OutlineItemEx)}.
*/
@Test
public void testPutNode() {
Map<Integer, OutlineItemEx> map = new HashMap<Integer, OutlineItemEx>();
assertNull(map.get(0));
assertNull(map.get(1));
assertNull(map.get(2));
assertNull(map.get(3));
assertNull(map.get(4));
BuildHelpTocMain.putNode(map, NODE_1A, 1);
assertNull(map.get(0));
assertEquals(NODE_1A, map.get(1));
assertNull(map.get(2));
assertNull(map.get(3));
assertNull(map.get(4));
BuildHelpTocMain.putNode(map, NODE_1B, 1);
assertNull(map.get(0));
assertEquals(NODE_1B, map.get(1));
assertNull(map.get(2));
assertNull(map.get(3));
assertNull(map.get(4));
BuildHelpTocMain.putNode(map, NODE_3, 3);
assertNull(map.get(0));
assertEquals(NODE_1B, map.get(1));
assertNull(map.get(2));
assertEquals(NODE_3, map.get(3));
assertNull(map.get(4));
}
/**
* Test method for {@link buildhelp.BuildHelpTocMain#findParent(java.util.Map, int)}.
*/
@Test
public void testFindParent() {
Map<Integer, OutlineItemEx> map = new HashMap<Integer, OutlineItemEx>();
assertNull(BuildHelpTocMain.findParent(map, 3));
assertNull(BuildHelpTocMain.findParent(map, 2));
BuildHelpTocMain.putNode(map, ROOT, 0);
assertEquals(ROOT, BuildHelpTocMain.findParent(map, 1));
assertEquals(ROOT, BuildHelpTocMain.findParent(map, 2));
assertEquals(ROOT, BuildHelpTocMain.findParent(map, 3));
BuildHelpTocMain.putNode(map, NODE_3, 3);
assertEquals(ROOT, BuildHelpTocMain.findParent(map, 1));
assertEquals(ROOT, BuildHelpTocMain.findParent(map, 2));
assertEquals(ROOT, BuildHelpTocMain.findParent(map, 3));
BuildHelpTocMain.putNode(map, NODE_1A, 1);
assertEquals(ROOT, BuildHelpTocMain.findParent(map, 1));
assertEquals(NODE_1A, BuildHelpTocMain.findParent(map, 2));
assertEquals(NODE_1A, BuildHelpTocMain.findParent(map, 3));
}
/**
* Test method for {@link buildhelp.BuildHelpTocMain#computeOutlineNodes(java.util.Map, String, String)}.
*/
@Test
public void testComputeOutlineNodes() throws Exception {
OutlineItemEx root = new OutlineItemEx(null, 0, "z9999", 0, 0, "root node");
Map<Integer, OutlineItemEx> map = new HashMap<Integer, OutlineItemEx>();
BuildHelpTocMain.putNode(map, root, 0);
StringBuilder sb = new StringBuilder();
sb.append("<html>");
sb.append("<header>");
sb.append("<title>Lorem</title>");
sb.append("</header>");
sb.append("<body>");
sb.append("<h1 id=\"n100\">Lorem</h1>");
sb.append("<h3 id=\"n101\">Utos lorem</h3>");
sb.append("<h3 id=\"n102\">Tardis lorem</h3>");
sb.append("<h3 id=\"n103\">Satis lorem</h3>");
sb.append("<h1 id=\"n200\">Ipsum</h1>");
sb.append("<h2><a id=\"n201\"/>Dolore ipsum</h2>");
sb.append("</body>");
sb.append("</html>");
Document doc = Jsoup.parse(sb.toString());
BuildHelpTocMain.computeOutlineNodes(map, doc, "page1.html");
assertEquals("root children size", 2, root.getChildren().size());
OutlineItem n100 = root.getChildren().get(0);
assertEquals("n100 label", "Lorem", n100.getLabel());
assertEquals("n100 id", "n100", n100.getId());
assertEquals("n100 children size", 3, n100.getChildren().size());
OutlineItem n101 = n100.getChildren().get(0);
assertEquals("n101 label", "Utos lorem", n101.getLabel());
assertEquals("n101 id", "n101", n101.getId());
assertEquals("n101 children size", 0, n101.getChildren().size());
OutlineItem n102 = n100.getChildren().get(1);
assertEquals("n102 label", "Tardis lorem", n102.getLabel());
assertEquals("n102 id", "n102", n102.getId());
assertEquals("n102 children size", 0, n102.getChildren().size());
OutlineItem n103 = n100.getChildren().get(2);
assertEquals("n103 label", "Satis lorem", n103.getLabel());
assertEquals("n103 id", "n103", n103.getId());
assertEquals("n103 children size", 0, n103.getChildren().size());
OutlineItem n200 = root.getChildren().get(1);
assertEquals("ispum children size", 1, n200.getChildren().size());
OutlineItem n201 = n200.getChildren().get(0);
assertEquals("n201 label", "Dolore ipsum", n201.getLabel());
assertEquals("n201 id", "n201", n201.getId());
assertEquals("n201 children size", 0, n201.getChildren().size());
}
/**
* Test method for {@link buildhelp.BuildHelpTocMain#findId(org.jsoup.nodes.Element)}.
*/
@Test
public void testFindId() {
Element e, c1, c2;
e = new Element(Tag.valueOf("h1"), "");
assertNull(BuildHelpTocMain.findId(e));
e = new Element(Tag.valueOf("h3"), "");
e.attr("class", "test-class");
assertNull(BuildHelpTocMain.findId(e));
e = new Element(Tag.valueOf("h2"), "");
e.attr("id", "test-id");
assertEquals("test-id", BuildHelpTocMain.findId(e));
e = new Element(Tag.valueOf("h3"), "");
c1 = new Element(Tag.valueOf("a"), "");
c1.attr("id", "my-id");
e.appendChild(c1);
assertEquals("my-id", BuildHelpTocMain.findId(e));
e = new Element(Tag.valueOf("h3"), "");
c1 = new Element(Tag.valueOf("a"), "");
e.appendChild(c1);
c2 = new Element(Tag.valueOf("a"), "");
c2.attr("id", "a-id");
e.appendChild(c2);
assertEquals("a-id", BuildHelpTocMain.findId(e));
e = new Element(Tag.valueOf("h3"), "");
c1 = new Element(Tag.valueOf("a"), "");
c1.attr("id", "my-id");
e.appendChild(c1);
c2 = new Element(Tag.valueOf("a"), "");
c2.attr("id", "a-id");
e.appendChild(c2);
assertEquals("my-id", BuildHelpTocMain.findId(e));
e = new Element(Tag.valueOf("h3"), "");
e.attr("id", "test-id");
c1 = new Element(Tag.valueOf("a"), "");
e.appendChild(c1);
c2 = new Element(Tag.valueOf("a"), "");
c2.attr("id", "a-id");
e.appendChild(c2);
assertEquals("test-id", BuildHelpTocMain.findId(e));
}
}
@ashikur2146
Copy link

Hello, I have created a OutlineitemEx class but still getting errors.. how to fix it. any ideas??? thanks in advance

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment