Skip to content

Instantly share code, notes, and snippets.

@jcheype
Created August 2, 2013 16:53
Show Gist options
  • Save jcheype/6141464 to your computer and use it in GitHub Desktop.
Save jcheype/6141464 to your computer and use it in GitHub Desktop.
package com.vidal.bo.monoviewer.util;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
/**
* Created with IntelliJ IDEA.
* User: jcheype
* Date: 02/08/13
* Time: 17:38
* To change this template use File | Settings | File Templates.
*/
public class HtmlDiff {
enum Mode {
CHAR,
TAG,
WHITESPACE
}
class Match{
int startInBefore;
int startInAfter;
int length;
int endInBefore;
int endInAfter;
Match(int startInBefore, int startInAfter, int length) {
this.startInBefore = startInBefore;
this.startInAfter = startInAfter;
this.length = length;
endInBefore = startInBefore + length - 1;
endInAfter = startInAfter + length - 1;
}
}
private Pattern tagPattern = Pattern.compile("^\\s*<[^>]+>\\s*$");
private boolean isEndOfTag(char c) {
return c == '>';
}
private boolean isStartOfTag(char c) {
return c == '<';
}
private boolean isWhitespace(char c) {
return Character.isWhitespace(c);
}
private boolean isTag(String token) {
return tagPattern.matcher(token).matches();
}
private StringBuilder pushWord(StringBuilder currentWord, List<String> words){
if (currentWord.length() > 0) {
words.add(currentWord.toString());
return new StringBuilder(128);
}
return currentWord;
}
private List<String> htmlToTokens(String html) {
List<String> words = new ArrayList<String>(2048);
Mode mode = Mode.CHAR;
StringBuilder currentWord = new StringBuilder(32);
for (char c : html.toCharArray()) {
switch (mode) {
case TAG:
currentWord.append(c);
if (isEndOfTag(c)) {
words.add(currentWord.toString());
currentWord = new StringBuilder(32);
mode = Mode.CHAR;
}
break;
case CHAR:
if (isStartOfTag(c)) {
currentWord = pushWord(currentWord, words);
mode = Mode.TAG;
} else if (isWhitespace(c)) {
currentWord = pushWord(currentWord, words);
mode = Mode.TAG;
} else if (!Character.isLetter(c)) {
currentWord = pushWord(currentWord, words);
}
currentWord.append(c);
break;
case WHITESPACE:
if (isStartOfTag(c)) {
currentWord = pushWord(currentWord, words);
mode = Mode.TAG;
}
else if(!isWhitespace(c)){
currentWord = pushWord(currentWord, words);
mode = Mode.CHAR;
}
currentWord.append(c);
break;
}
}
pushWord(currentWord, words);
return words;
}
private void findMatch(int beforeTokens,
int afterTokens,
int indexOfBeforeLocationsInAfterTokens,
int startInBefore,
int endInBefore,
int startInAfter,
int endInAfter){
int bestMatchInBefore = startInBefore;
int bestMatchInAfter = startInAfter;
int bestMatchLength = 0;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment