Skip to content

Instantly share code, notes, and snippets.

@chaoxu
Created July 11, 2011 07:33
Show Gist options
  • Save chaoxu/1075430 to your computer and use it in GitHub Desktop.
Save chaoxu/1075430 to your computer and use it in GitHub Desktop.
LaTeX2HTML Java version
import java.util.HashMap;
public class Counter {
public HashMap<String, Integer> counters;
Counter(){
//initialize counters
counters = new HashMap<String, Integer>();
newCounter("part");
newCounter("chapter");
newCounter("section");
newCounter("subsection");
newCounter("subsubsection");
newCounter("footnote");
newCounter("table");
newCounter("equation");
newCounter("mpfootnote");
newCounter("subparagraph");
newCounter("page");
newCounter("figure");
newCounter("enumi");
newCounter("enumii");
newCounter("enumiii");
newCounter("enumiv");
}
public void addToCounter(String counter, int value){
if(!isCounter(counter)){
setCounter(counter, value);
}else{
setCounter(counter, counters.get(counter)+value);
}
}
public int value(String counter){
if(!isCounter(counter)){
return 0;
}
return counters.get(counter);
}
public void setCounter(String counter, int value){
counters.put(counter,value);
}
public void newCounter(String counter){
//set to 0 if and only if such counter
//doesn't exist
if(!isCounter(counter)){
setCounter(counter, 0);
}
}
public boolean isCounter(String counter){
return counters.containsKey(counter);
}
}
import java.util.*;
class Token{
String token;
int type;
Token (String s, int t){
token = s;
type = t;
}
public boolean isCommand(){
if(type==1) return true;
return false;
}
public boolean isCommand(String s){
if(type==1 && token.equals(s)) return true;
return false;
}
public boolean isConstant(){
if(type==0) return true;
return false;
}
public boolean isConstant(String s){
if(isConstant() && token.equals(s)) return true;
return false;
}
public boolean equals(Object t){
Token t2 = (Token) t;
if(t2.token.equals(token)&&t2.type==type){
return true;
}
return false;
}
public String value(){
return token;
}
public String toString(){
return token+":"+type;
}
}
public class Lexer {
Stack<Character> tex;
public Stack<Token> t;
Lexer(Stack<Character> z){
tex = z;
t= new Stack<Token>();
lex();
}
public void lex(){
while(!tex.empty()){
//System.out.println(tex);
//ArrayList<Character> cs = new ArrayList<Character>();
Character c = tex.pop();
if(c=='\\'){
t.push(new Token(readCommand(),1));
}else if(c=='{'||c=='['||c==']'||c=='}'){
t.push(new Token(c.toString(),1));
}else if(c=='$'){
if(tex.peek()=='$'){
tex.pop();
t.push(new Token("$$",1));
}else{
t.push(new Token("$",1));
}
}else{
t.push(new Token(c+readConstant(),0));
}
}
Stack<Token> z = new Stack<Token>();
for(int i=0;i<t.size();i++){
z.push(t.get(t.size()-1-i));
}
t = z;
}
private String readCommand(){
if(tex.empty()){
return "";
}
ArrayList<Character> cmd = new ArrayList<Character>();
Character c = tex.peek();
if(c=='['||c==']'){
tex.pop();
return "$$";
}
while(c=='*'||(48<=c&&c<=57)||(65<=c&&c<=90)||(97<=c&&c<=122)){
cmd.add(c);
tex.pop();
if(tex.empty()){
break;
}
c = tex.peek();
}
String s = "";
for(int i=0;i<cmd.size();i++){
s+=cmd.get(i);
}
return s;
}
private String readConstant(){
if(tex.empty()){
return "";
}
ArrayList<Character> cmd = new ArrayList<Character>();
Character c = tex.peek();
while(c!='{'&&c!='}'&&c!='\\'&&c!='['&&c!=']'&&c!='$'){
cmd.add(c);
tex.pop();
if(tex.empty()){
break;
}
c = tex.peek();
}
String s = "";
for(int i=0;i<cmd.size();i++){
s+=cmd.get(i);
}
return s;
}
}
import java.util.*;
public class Parser {
public static Stack<Token> read;
public static Stack<Token> tokens;
public static final int CMD = 1;
public static final int CONST = 0;
public static final Token ENVEND = new Token("end",1);
public static final Token CMDEND = new Token("}",1);
public static Counter counters;
public static void main(String[] args){
counters = new Counter();
//the loop that parses everything
read = new Stack<Token>();
Stack<Character> tex = new Stack<Character>();
Scanner in = new Scanner(System.in);
StringBuffer s = new StringBuffer();
while(in.hasNext()){
s.append(in.nextLine()).append("\n");
}
for(int i=s.length()-1;i>-1;i--){
tex.push(s.charAt(i));
}
Lexer l = new Lexer(tex);
tokens = l.t;
//System.out.println(tokens);
Tree parsed = parse(new Token("",4));
System.out.println(parsed);
}
public static Tree parse(){
return parse(CMDEND);
}
public static Tree parse(ArrayList<Token> end){
Tree tree= new Tree("");
while(true){
if(tokens.empty()){
return tree;
}
Token t = tokens.pop();
boolean eqend = false;
for(int i=0;i<end.size();i++){
if(t.equals(end.get(i))){
eqend = true;
break;
}
}
if(eqend){
tokens.push(t);
return tree;
}else if(t.isCommand()){
tree.addChild(command(t.token));
}else{
tree.addChild(new Tree(t.token));
}
}
}
public static Tree parse(Token end){
ArrayList<Token> ends = new ArrayList<Token>();
ends.add(ENVEND);
//ends.add(CMDEND);
ends.add(end);
return parse(ends);
}
public static Tree command(String cmd){
//if the command has 1 optional, 2 optional
Tree t = new Tree("\\"+cmd);
if(cmd.equals("textbf")){
t = cmd_textFORMAT("strong");
}else if(cmd.equals("texttt")){
t = cmd_textFORMAT("tt");
}else if(cmd.equals("emph")){
t = cmd_textFORMAT("em");
}else if(cmd.equals("textsuperscript")){
t = cmd_textFORMAT("sup");
}else if(cmd.equals("$")){
t = cmd_inlinemath();
}else if(cmd.equals("$$")){
t = cmd_displaymath();
}else if(cmd.equals("begin")){
t = cmd_begin();
}else if(cmd.equals("[")||cmd.equals("]")){
t = new Tree(cmd);
}else if(cmd.equals("}")){
t = new Tree("");
}else if(cmd.equals("end")){
t = cmd_end();
}else if(cmd.equals("section")){
t = cmd_section();
}else if(cmd.equals("subsection")){
t = cmd_subsection();
}else if(cmd.equals("subsubsection")){
t = cmd_subsubsection();
}else if(cmd.equals("section*")){
t = cmd_textFORMAT("h3");
}else if(cmd.equals("subsection*")){
t = cmd_textFORMAT("h4");
}else if(cmd.equals("subsubsection*")){
t = cmd_textFORMAT("h5");
}
return t;
}
public static Tree cmd_section(){
Tree t = new Tree();
counters.addToCounter("section", 1);
counters.setCounter("subsection", 0);
counters.setCounter("subsubsection", 0);
t.addChild("<h3>"+counters.value("section")+".");
t.addAllChild(parseParameter());
t.addChild("</h3>");
return t;
}
public static Tree cmd_subsection(){
Tree t = new Tree();
counters.addToCounter("subsection", 1);
counters.setCounter("subsubsection", 0);
t.addChild("<h4>"+counters.value("section")+"."+counters.value("subsection")+".");
t.addAllChild(parseParameter());
t.addChild("</h4>");
return t;
}
public static Tree cmd_subsubsection(){
Tree t = new Tree();
counters.addToCounter("subsubsection", 1);
t.addChild("<h5>"+counters.value("section")+"."+counters.value("subsection")+"."+counters.value("subsubsection")+".");
t.addAllChild(parseParameter());
t.addChild("</h5>");
return t;
}
public static Tree cmd_begin(){
Tree t = new Tree();
Tree param = parseParameter();
t.addChild("\\begin{");
t.addAllChild(param);
t.addChild("}");
String env = param.toString();
tokens.pop();
if(env.equals("theorem")){
t = env_theorem("<blockquote class='theorem'>","</blockquote>");
}else if(env.equals("definition")){
t = env_theorem("<blockquote class='definition'>","</blockquote>");
}else if(env.equals("lemma")){
t = env_theorem("<blockquote class='lemma'>","</blockquote>");
}else if(env.equals("remark")){
t = env_theorem("<blockquote class='remark'>","</blockquote>");
}else if(env.equals("quotation")){
t = env_theorem("<blockquote class='quotation'>","</blockquote>");
}else if(env.equals("problem")){
t = env_theorem("<blockquote class='problem'>","</blockquote>");
}else if(env.equals("proof")){
t = env_theorem("<p><em>Proof .</em>"," Q.E.D.</p>");
}else if(env.equals("enumerate")){
t = env_enumerate();
}else if(env.equals("itemize")){
t = env_itemize();
}else if(env.equals("document")){
t = env_document();
}else if(isMathEnv(env)){
t = env_MATHIGNORE(env);
}else{
t.addAllChild(parse());
t.addChild("\\end{");
t.addAllChild(param);
t.addChild("}");
}
//remove the \end part
tokens.pop();
cmd_end();
return t;
}
public static Tree cmd_end(){
parseParameter();
return new Tree("");
}
public static Tree cmd_inlinemath(){
Tree t = new Tree("");
t.addChild("$");
//search for the next $
while(!tokens.peek().isCommand("$")){
t.addChild(plain(tokens.pop()));
}
tokens.pop();
t.addChild("$");
return t;
}
public static Tree cmd_displaymath(){
Tree t = new Tree("");
t.addChild("\\[");
//search for the next $$
while(!tokens.peek().isCommand("$$")){
t.addChild(plain(tokens.pop()));
}
tokens.pop();
t.addChild("\\]");
return t;
}
public static String plain(Token t){
if(t.isCommand()&&!t.token.equals("{")&&!t.token.equals("}")&&!t.token.equals("$")&&!t.token.equals("$$")&&!t.token.equals("[")&&!t.token.equals("]")){
return "\\"+t.token;
}
return t.token;
}
public static Tree cmd_textFORMAT(String s){
return cmd_textFORMAT("<"+s+">", "</"+s+">");
}
public static Tree cmd_textFORMAT(String pre, String suf){
Tree t = new Tree("");
t.addChild(new Tree(pre));
t.addChild(parseParameter());
t.addChild(new Tree(suf));
return t;
}
public static Tree parseOptionalParameter(Tree def){
if(tokens.peek().isConstant()){
Token to = tokens.pop();
String s = ltrim(to.token);
if(s.length()!=0){
tokens.push(to);
return def;
}
}
if(tokens.peek().isCommand("[")){
tokens.pop();
Tree t = new Tree("");
//search for the next RIGHTSQUAREBRACKET
while(!tokens.peek().isCommand("]")){
if(tokens.peek().isCommand("{")){
tokens.pop();
t.addAllChild(parse(CMDEND));
}else{
t.addChild(plain(tokens.pop()));
}
}
tokens.pop();
return t;
}
return def;
}
public static Tree parseParameter(){
if(tokens.peek().isConstant()){
String s = ltrim(tokens.pop().token);
if(s.length()!=0){
Token to = new Token(s.substring(1),CONST);
tokens.push(to);
return new Tree(String.valueOf(s.charAt(0)));
}
}
//this means we have a white space
tokens.pop();
return parse(CMDEND);
}
public static Tree env_theorem(String pre,String suf){
Tree t = new Tree("");
//read optional parameter
Tree def = new Tree("");
t.addChild(pre);
Tree optional = parseOptionalParameter(def);
if(!optional.isLeaf()){
t.addChild("(");
t.addAllChild(optional);
t.addChild(")");
}
t.addAllChild(parse(ENVEND));
t.addChild(suf);
return t;
}
public static Tree env_document(){
return cmd_textFORMAT("<article>", "</article>");
}
public static Tree env_enumerate(){
Tree t = new Tree();
t.addChild(new Tree("<ol>\n"));
t.addAllChild(item_helper());
t.addChild(new Tree("</ol>\n"));
return t;
}
public static Tree env_itemize(){
Tree t = new Tree();
t.addChild(new Tree("<ul>\n"));
t.addAllChild(item_helper());
t.addChild(new Tree("</ul>\n"));
return t;
}
public static Tree env_MATHIGNORE(String env){
Tree t = new Tree();
t.addChild("\\begin{"+env+"}");
while(true){
while(!tokens.peek().equals(ENVEND)){
t.addChild(plain(tokens.pop()));
}
Token to = tokens.pop();
Token to2 = tokens.pop();
if(tokens.empty()||tokens.peek().isConstant(env)){
tokens.push(to2);
tokens.push(to);
break;
}else{
t.addChild(plain(to));
t.addChild(plain(to2));
}
}
t.addChild("\\end{"+env+"}");
return t;
}
public static Tree item_helper(){
Tree t = new Tree("");
while(tokens.peek().isConstant()){
tokens.pop();
}
while(tokens.peek().isCommand("item")){
tokens.pop();
t.addChild("<li>\n");
t.addChild((parse(new Token("item",1))).toString().trim());
t.addChild("\n</li>\n");
}
return t;
}
public static String ltrim(String s) {
int i = 0;
while (i < s.length() && Character.isWhitespace(s.charAt(i))) {
i++;
}
return s.substring(i);
}
public static boolean isMathEnv(String s){
String[] mathenv = {"align","align*","alignat","alignat*","aligned","alignedat","array",
"Bmatrix","bmatrix", "cases", "eqnarray", "eqnarray*", "equation",
"equation*","gather","gather*","gathered","matrix","multline","multline*",
"pmatrix","smallmatrix","split","subarray","Vmatrix","vmatrix"};
HashSet<String> h = new HashSet<String>();
for(int i=0;i<mathenv.length;i++){
h.add(mathenv[i]);
}
if(h.contains(s)){
return true;
}
return false;
}
}
import java.util.*;
public class Tree {
private List<Tree> Children = new ArrayList<Tree>();
private String data;
public Tree(String data) {
this.data = data;
}
public Tree(){
this.data = "";
}
public Tree getChild(int i){
return Children.get(i);
}
public void setChild(int i, Tree t){
Children.set(i, t);
}
public boolean isLeaf(){
if(Children.size()==0){
return true;
}
return false;
}
public void addChild(Tree t){
Children.add(t);
}
public void addChild(String s){
Children.add(new Tree(s));
}
public List<Tree> getChildren(Tree t){
return Children;
}
public void addAllChild(Tree t){
List<Tree> oc = t.getChildren(t);
for(int i=0;i<oc.size();i++){
Children.add(oc.get(i));
}
}
public String toString(){
if(isLeaf()){
return data;
}
StringBuffer s = new StringBuffer();
for(int i=0;i<Children.size();i++){
s.append(Children.get(i).toString());
}
return s.toString();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment